diff --git a/firebase-ai/api.txt b/firebase-ai/api.txt
index ba27e5682d8..f73c51d7112 100644
--- a/firebase-ai/api.txt
+++ b/firebase-ai/api.txt
@@ -154,6 +154,9 @@ package com.google.firebase.ai.java {
     method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation(boolean enableInterruptions);
     method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler);
     method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler, boolean enableInterruptions);
+    method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler, kotlin.jvm.functions.Function2<? super com.google.firebase.ai.type.Transcription?,? super com.google.firebase.ai.type.Transcription?,kotlin.Unit>? transcriptHandler, boolean enableInterruptions);
+    method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation(kotlin.jvm.functions.Function2<? super com.google.firebase.ai.type.Transcription?,? super com.google.firebase.ai.type.Transcription?,kotlin.Unit>? transcriptHandler);
+    method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> startAudioConversation(kotlin.jvm.functions.Function2<? super com.google.firebase.ai.type.Transcription?,? super com.google.firebase.ai.type.Transcription?,kotlin.Unit>? transcriptHandler, boolean enableInterruptions);
     method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public abstract com.google.common.util.concurrent.ListenableFuture<kotlin.Unit> stopAudioConversation();
     method public abstract void stopReceiving();
     field public static final com.google.firebase.ai.java.LiveSessionFutures.Companion Companion;
@@ -174,6 +177,10 @@ package com.google.firebase.ai.type {
     ctor public AudioRecordInitializationFailedException(String message);
   }
 
+  public final class AudioTranscriptionConfig {
+    ctor public AudioTranscriptionConfig();
+  }
+
   public final class BlockReason {
     method public String getName();
     method public int getOrdinal();
@@ -839,7 +846,9 @@ package com.google.firebase.ai.type {
     ctor public LiveGenerationConfig.Builder();
     method public com.google.firebase.ai.type.LiveGenerationConfig build();
     method public com.google.firebase.ai.type.LiveGenerationConfig.Builder setFrequencyPenalty(Float? frequencyPenalty);
+    method public com.google.firebase.ai.type.LiveGenerationConfig.Builder setInputAudioTranscription(com.google.firebase.ai.type.AudioTranscriptionConfig? config);
     method public com.google.firebase.ai.type.LiveGenerationConfig.Builder setMaxOutputTokens(Integer? maxOutputTokens);
+    method public com.google.firebase.ai.type.LiveGenerationConfig.Builder setOutputAudioTranscription(com.google.firebase.ai.type.AudioTranscriptionConfig? config);
     method public com.google.firebase.ai.type.LiveGenerationConfig.Builder setPresencePenalty(Float? presencePenalty);
     method public com.google.firebase.ai.type.LiveGenerationConfig.Builder setResponseModality(com.google.firebase.ai.type.ResponseModality? responseModality);
     method public com.google.firebase.ai.type.LiveGenerationConfig.Builder setSpeechConfig(com.google.firebase.ai.type.SpeechConfig? speechConfig);
@@ -847,7 +856,9 @@ package com.google.firebase.ai.type {
     method public com.google.firebase.ai.type.LiveGenerationConfig.Builder setTopK(Integer? topK);
     method public com.google.firebase.ai.type.LiveGenerationConfig.Builder setTopP(Float? topP);
     field public Float? frequencyPenalty;
+    field public com.google.firebase.ai.type.AudioTranscriptionConfig? inputAudioTranscription;
     field public Integer? maxOutputTokens;
+    field public com.google.firebase.ai.type.AudioTranscriptionConfig? outputAudioTranscription;
     field public Float? presencePenalty;
     field public com.google.firebase.ai.type.ResponseModality? responseModality;
     field public com.google.firebase.ai.type.SpeechConfig? speechConfig;
@@ -865,14 +876,18 @@ package com.google.firebase.ai.type {
   }
 
   @com.google.firebase.ai.type.PublicPreviewAPI public final class LiveServerContent implements com.google.firebase.ai.type.LiveServerMessage {
-    ctor public LiveServerContent(com.google.firebase.ai.type.Content? content, boolean interrupted, boolean turnComplete, boolean generationComplete);
+    ctor @Deprecated public LiveServerContent(com.google.firebase.ai.type.Content? content, boolean interrupted, boolean turnComplete, boolean generationComplete, com.google.firebase.ai.type.Transcription? inputTranscription, com.google.firebase.ai.type.Transcription? outputTranscription);
     method public com.google.firebase.ai.type.Content? getContent();
     method public boolean getGenerationComplete();
+    method public com.google.firebase.ai.type.Transcription? getInputTranscription();
     method public boolean getInterrupted();
+    method public com.google.firebase.ai.type.Transcription? getOutputTranscription();
     method public boolean getTurnComplete();
     property public final com.google.firebase.ai.type.Content? content;
     property public final boolean generationComplete;
+    property public final com.google.firebase.ai.type.Transcription? inputTranscription;
     property public final boolean interrupted;
+    property public final com.google.firebase.ai.type.Transcription? outputTranscription;
     property public final boolean turnComplete;
   }
 
@@ -909,6 +924,7 @@ package com.google.firebase.ai.type {
     method public suspend Object? sendVideoRealtime(com.google.firebase.ai.type.InlineData video, kotlin.coroutines.Continuation<? super kotlin.Unit>);
     method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler = null, boolean enableInterruptions = false, kotlin.coroutines.Continuation<? super kotlin.Unit>);
     method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler = null, kotlin.coroutines.Continuation<? super kotlin.Unit>);
+    method @RequiresPermission(android.Manifest.permission.RECORD_AUDIO) public suspend Object? startAudioConversation(kotlin.jvm.functions.Function1<? super com.google.firebase.ai.type.FunctionCallPart,com.google.firebase.ai.type.FunctionResponsePart>? functionCallHandler = null, kotlin.jvm.functions.Function2<? super com.google.firebase.ai.type.Transcription?,? super com.google.firebase.ai.type.Transcription?,kotlin.Unit>? transcriptHandler = null, boolean enableInterruptions = false, kotlin.coroutines.Continuation<? super kotlin.Unit>);
     method public void stopAudioConversation();
     method public void stopReceiving();
   }
@@ -1235,6 +1251,11 @@ package com.google.firebase.ai.type {
     ctor public ToolConfig(com.google.firebase.ai.type.FunctionCallingConfig? functionCallingConfig);
   }
 
+  public final class Transcription {
+    method public String? getText();
+    property public final String? text;
+  }
+
   public final class UnknownException extends com.google.firebase.ai.type.FirebaseAIException {
   }
 
diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/LiveGenerativeModel.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/LiveGenerativeModel.kt
index d5afca6b960..b0a1b541c6b 100644
--- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/LiveGenerativeModel.kt
+++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/LiveGenerativeModel.kt
@@ -111,7 +111,9 @@ internal constructor(
           modelName,
           config?.toInternal(),
           tools?.map { it.toInternal() },
-          systemInstruction?.toInternal()
+          systemInstruction?.toInternal(),
+          config?.inputAudioTranscription?.toInternal(),
+          config?.outputAudioTranscription?.toInternal()
         )
         .toInternal()
     val data: String = Json.encodeToString(clientMessage)
@@ -135,7 +137,7 @@ internal constructor(
     } catch (e: ClosedReceiveChannelException) {
       val reason = webSession?.closeReason?.await()
       val message =
-        "Channel was closed by the server.${if(reason!=null) " Details: ${reason.message}" else "" }"
+        "Channel was closed by the server.${if (reason != null) " Details: ${reason.message}" else ""}"
       throw ServiceConnectionHandshakeFailedException(message, e)
     }
   }
diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/java/LiveSessionFutures.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/java/LiveSessionFutures.kt
index 2fb74689643..5a04ed9f97c 100644
--- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/java/LiveSessionFutures.kt
+++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/java/LiveSessionFutures.kt
@@ -29,6 +29,7 @@ import com.google.firebase.ai.type.LiveSession
 import com.google.firebase.ai.type.MediaData
 import com.google.firebase.ai.type.PublicPreviewAPI
 import com.google.firebase.ai.type.SessionAlreadyReceivingException
+import com.google.firebase.ai.type.Transcription
 import io.ktor.websocket.close
 import kotlinx.coroutines.reactive.asPublisher
 import org.reactivestreams.Publisher
@@ -41,6 +42,13 @@ import org.reactivestreams.Publisher
 @PublicPreviewAPI
 public abstract class LiveSessionFutures internal constructor() {
 
+  /**
+   * Starts an audio conversation with the model, which can only be stopped using
+   * [stopAudioConversation].
+   */
+  @RequiresPermission(RECORD_AUDIO)
+  public abstract fun startAudioConversation(): ListenableFuture<Unit>
+
   /**
    * Starts an audio conversation with the model, which can only be stopped using
    * [stopAudioConversation] or [close].
@@ -56,9 +64,14 @@ public abstract class LiveSessionFutures internal constructor() {
   /**
    * Starts an audio conversation with the model, which can only be stopped using
    * [stopAudioConversation].
+   * @param transcriptHandler A callback function that is invoked whenever the model receives a
+   * transcript. The first [Transcription] object is the input transcription, and the second is the
+   * output transcription
    */
   @RequiresPermission(RECORD_AUDIO)
-  public abstract fun startAudioConversation(): ListenableFuture<Unit>
+  public abstract fun startAudioConversation(
+    transcriptHandler: ((Transcription?, Transcription?) -> Unit)?,
+  ): ListenableFuture<Unit>
 
   /**
    * Starts an audio conversation with the model, which can only be stopped using
@@ -73,6 +86,26 @@ public abstract class LiveSessionFutures internal constructor() {
   @RequiresPermission(RECORD_AUDIO)
   public abstract fun startAudioConversation(enableInterruptions: Boolean): ListenableFuture<Unit>
 
+  /**
+   * Starts an audio conversation with the model, which can only be stopped using
+   * [stopAudioConversation] or [close].
+   *
+   * @param transcriptHandler A callback function that is invoked whenever the model receives a
+   * transcript. The first [Transcription] object is the input transcription, and the second is the
+   * output transcription
+   *
+   * @param enableInterruptions If enabled, allows the user to speak over or interrupt the model's
+   * ongoing reply.
+   *
+   * **WARNING**: The user interruption feature relies on device-specific support, and may not be
+   * consistently available.
+   */
+  @RequiresPermission(RECORD_AUDIO)
+  public abstract fun startAudioConversation(
+    transcriptHandler: ((Transcription?, Transcription?) -> Unit)?,
+    enableInterruptions: Boolean
+  ): ListenableFuture<Unit>
+
   /**
    * Starts an audio conversation with the model, which can only be stopped using
    * [stopAudioConversation] or [close].
@@ -92,6 +125,30 @@ public abstract class LiveSessionFutures internal constructor() {
     enableInterruptions: Boolean
   ): ListenableFuture<Unit>
 
+  /**
+   * Starts an audio conversation with the model, which can only be stopped using
+   * [stopAudioConversation] or [close].
+   *
+   * @param functionCallHandler A callback function that is invoked whenever the model receives a
+   * function call.
+   *
+   * @param transcriptHandler A callback function that is invoked whenever the model receives a
+   * transcript. The first [Transcription] object is the input transcription, and the second is the
+   * output transcription
+   *
+   * @param enableInterruptions If enabled, allows the user to speak over or interrupt the model's
+   * ongoing reply.
+   *
+   * **WARNING**: The user interruption feature relies on device-specific support, and may not be
+   * consistently available.
+   */
+  @RequiresPermission(RECORD_AUDIO)
+  public abstract fun startAudioConversation(
+    functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)?,
+    transcriptHandler: ((Transcription?, Transcription?) -> Unit)?,
+    enableInterruptions: Boolean
+  ): ListenableFuture<Unit>
+
   /**
    * Stops the audio conversation with the Gemini Server.
    *
@@ -233,6 +290,14 @@ public abstract class LiveSessionFutures internal constructor() {
       functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)?
     ) = SuspendToFutureAdapter.launchFuture { session.startAudioConversation(functionCallHandler) }
 
+    @RequiresPermission(RECORD_AUDIO)
+    override fun startAudioConversation(
+      transcriptHandler: ((Transcription?, Transcription?) -> Unit)?
+    ) =
+      SuspendToFutureAdapter.launchFuture {
+        session.startAudioConversation(transcriptHandler = transcriptHandler)
+      }
+
     @RequiresPermission(RECORD_AUDIO)
     override fun startAudioConversation() =
       SuspendToFutureAdapter.launchFuture { session.startAudioConversation() }
@@ -243,6 +308,32 @@ public abstract class LiveSessionFutures internal constructor() {
         session.startAudioConversation(enableInterruptions = enableInterruptions)
       }
 
+    @RequiresPermission(RECORD_AUDIO)
+    override fun startAudioConversation(
+      transcriptHandler: ((Transcription?, Transcription?) -> Unit)?,
+      enableInterruptions: Boolean
+    ) =
+      SuspendToFutureAdapter.launchFuture {
+        session.startAudioConversation(
+          transcriptHandler = transcriptHandler,
+          enableInterruptions = enableInterruptions
+        )
+      }
+
+    @RequiresPermission(RECORD_AUDIO)
+    override fun startAudioConversation(
+      functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)?,
+      transcriptHandler: ((Transcription?, Transcription?) -> Unit)?,
+      enableInterruptions: Boolean
+    ) =
+      SuspendToFutureAdapter.launchFuture {
+        session.startAudioConversation(
+          functionCallHandler = functionCallHandler,
+          transcriptHandler = transcriptHandler,
+          enableInterruptions = enableInterruptions
+        )
+      }
+
     @RequiresPermission(RECORD_AUDIO)
     override fun startAudioConversation(
       functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)?,
diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/AudioTranscriptionConfig.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/AudioTranscriptionConfig.kt
new file mode 100644
index 00000000000..406af4d4c6f
--- /dev/null
+++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/AudioTranscriptionConfig.kt
@@ -0,0 +1,27 @@
+/*
+ * Copyright 2025 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.firebase.ai.type
+
+import kotlinx.serialization.Serializable
+
+/** The audio transcription configuration. Its presence enables audio transcription */
+public class AudioTranscriptionConfig {
+
+  @Serializable internal object Internal
+
+  internal fun toInternal() = Internal
+}
diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveClientSetupMessage.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveClientSetupMessage.kt
index 36e06b184e8..856eebbdde5 100644
--- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveClientSetupMessage.kt
+++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveClientSetupMessage.kt
@@ -32,7 +32,9 @@ internal class LiveClientSetupMessage(
   // needs its own config class
   val generationConfig: LiveGenerationConfig.Internal?,
   val tools: List<Tool.Internal>?,
-  val systemInstruction: Content.Internal?
+  val systemInstruction: Content.Internal?,
+  val inputAudioTranscription: AudioTranscriptionConfig.Internal?,
+  val outputAudioTranscription: AudioTranscriptionConfig.Internal?,
 ) {
   @Serializable
   internal class Internal(val setup: LiveClientSetup) {
@@ -41,10 +43,21 @@ internal class LiveClientSetupMessage(
       val model: String,
       val generationConfig: LiveGenerationConfig.Internal?,
       val tools: List<Tool.Internal>?,
-      val systemInstruction: Content.Internal?
+      val systemInstruction: Content.Internal?,
+      val inputAudioTranscription: AudioTranscriptionConfig.Internal?,
+      val outputAudioTranscription: AudioTranscriptionConfig.Internal?,
     )
   }
 
   fun toInternal() =
-    Internal(Internal.LiveClientSetup(model, generationConfig, tools, systemInstruction))
+    Internal(
+      Internal.LiveClientSetup(
+        model,
+        generationConfig,
+        tools,
+        systemInstruction,
+        inputAudioTranscription,
+        outputAudioTranscription
+      )
+    )
 }
diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveGenerationConfig.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveGenerationConfig.kt
index eb9dcc716c9..3e014d43162 100644
--- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveGenerationConfig.kt
+++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveGenerationConfig.kt
@@ -53,6 +53,11 @@ import kotlinx.serialization.Serializable
  *
  * @property speechConfig Specifies the voice configuration of the audio response from the server.
  *
+ * @property inputAudioTranscription Specifies the configuration for transcribing input audio.
+ *
+ * @property outputAudioTranscription Specifies the configuration for transcribing output audio from
+ * the model.
+ *
  * Refer to the
  * [Control generated output](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal/control-generated-output)
  * guide for more details.
@@ -67,7 +72,9 @@ private constructor(
   internal val presencePenalty: Float?,
   internal val frequencyPenalty: Float?,
   internal val responseModality: ResponseModality?,
-  internal val speechConfig: SpeechConfig?
+  internal val speechConfig: SpeechConfig?,
+  internal val inputAudioTranscription: AudioTranscriptionConfig?,
+  internal val outputAudioTranscription: AudioTranscriptionConfig?,
 ) {
 
   /**
@@ -91,6 +98,10 @@ private constructor(
    * @property responseModality See [LiveGenerationConfig.responseModality]
    *
    * @property speechConfig See [LiveGenerationConfig.speechConfig]
+   *
+   * @property inputAudioTranscription see [LiveGenerationConfig.inputAudioTranscription]
+   *
+   * @property outputAudioTranscription see [LiveGenerationConfig.outputAudioTranscription]
    */
   public class Builder {
     @JvmField public var temperature: Float? = null
@@ -101,6 +112,8 @@ private constructor(
     @JvmField public var frequencyPenalty: Float? = null
     @JvmField public var responseModality: ResponseModality? = null
     @JvmField public var speechConfig: SpeechConfig? = null
+    @JvmField public var inputAudioTranscription: AudioTranscriptionConfig? = null
+    @JvmField public var outputAudioTranscription: AudioTranscriptionConfig? = null
 
     public fun setTemperature(temperature: Float?): Builder = apply {
       this.temperature = temperature
@@ -123,6 +136,14 @@ private constructor(
       this.speechConfig = speechConfig
     }
 
+    public fun setInputAudioTranscription(config: AudioTranscriptionConfig?): Builder = apply {
+      this.inputAudioTranscription = config
+    }
+
+    public fun setOutputAudioTranscription(config: AudioTranscriptionConfig?): Builder = apply {
+      this.outputAudioTranscription = config
+    }
+
     /** Create a new [LiveGenerationConfig] with the attached arguments. */
     public fun build(): LiveGenerationConfig =
       LiveGenerationConfig(
@@ -133,7 +154,9 @@ private constructor(
         presencePenalty = presencePenalty,
         frequencyPenalty = frequencyPenalty,
         speechConfig = speechConfig,
-        responseModality = responseModality
+        responseModality = responseModality,
+        inputAudioTranscription = inputAudioTranscription,
+        outputAudioTranscription = outputAudioTranscription,
       )
   }
 
diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveServerMessage.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveServerMessage.kt
index 5cabe593bd6..a250f4a13c9 100644
--- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveServerMessage.kt
+++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveServerMessage.kt
@@ -42,7 +42,9 @@ import kotlinx.serialization.json.jsonObject
  * play it out in realtime.
  */
 @PublicPreviewAPI
-public class LiveServerContent(
+public class LiveServerContent
+@Deprecated("This class should not be constructed, only received from the Server")
+public constructor(
   /**
    * The content that the model has generated as part of the current conversation with the user.
    *
@@ -82,25 +84,43 @@ public class LiveServerContent(
    * [interrupted] -> [turnComplete].
    */
   public val generationComplete: Boolean,
+
+  /**
+   * The input transcription. The transcription is independent to the model turn which means it
+   * doesn't imply any ordering between transcription and model turn.
+   */
+  public val inputTranscription: Transcription?,
+
+  /**
+   * The output transcription. The transcription is independent to the model turn which means it
+   * doesn't imply any ordering between transcription and model turn.
+   */
+  public val outputTranscription: Transcription?
 ) : LiveServerMessage {
   @OptIn(ExperimentalSerializationApi::class)
   @Serializable
   internal data class Internal(
-    val modelTurn: Content.Internal? = null,
-    val interrupted: Boolean = false,
-    val turnComplete: Boolean = false,
-    val generationComplete: Boolean = false
+    val modelTurn: Content.Internal?,
+    val interrupted: Boolean?,
+    val turnComplete: Boolean?,
+    val generationComplete: Boolean?,
+    val inputTranscription: Transcription.Internal?,
+    val outputTranscription: Transcription.Internal?
   )
   @Serializable
   internal data class InternalWrapper(val serverContent: Internal) : InternalLiveServerMessage {
     @OptIn(ExperimentalSerializationApi::class)
-    override fun toPublic() =
-      LiveServerContent(
+    override fun toPublic(): LiveServerContent {
+      // WhenMajor(Revisit the decision to make these have default values)
+      return LiveServerContent(
         serverContent.modelTurn?.toPublic(),
-        serverContent.interrupted,
-        serverContent.turnComplete,
-        serverContent.generationComplete
+        serverContent.interrupted ?: false,
+        serverContent.turnComplete ?: false,
+        serverContent.generationComplete ?: false,
+        serverContent.inputTranscription?.toPublic(),
+        serverContent.outputTranscription?.toPublic()
       )
+    }
   }
 }
 
diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt
index 0e6796ab01b..9e8b7d7f683 100644
--- a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt
+++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/LiveSession.kt
@@ -28,7 +28,6 @@ import com.google.firebase.ai.common.JSON
 import com.google.firebase.ai.common.util.CancelledCoroutineScope
 import com.google.firebase.ai.common.util.accumulateUntil
 import com.google.firebase.ai.common.util.childJob
-import com.google.firebase.ai.type.MediaData.Internal
 import com.google.firebase.annotations.concurrent.Blocking
 import io.ktor.client.plugins.websocket.DefaultClientWebSocketSession
 import io.ktor.websocket.Frame
@@ -120,6 +119,37 @@ internal constructor(
     functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)? = null,
     enableInterruptions: Boolean = false,
   ) {
+    startAudioConversation(
+      functionCallHandler = functionCallHandler,
+      transcriptHandler = null,
+      enableInterruptions = enableInterruptions
+    )
+  }
+
+  /**
+   * Starts an audio conversation with the model, which can only be stopped using
+   * [stopAudioConversation] or [close].
+   *
+   * @param functionCallHandler A callback function that is invoked whenever the model receives a
+   * function call. The [FunctionResponsePart] that the callback function returns will be
+   * automatically sent to the model.
+   *
+   * @param transcriptHandler A callback function that is invoked whenever the model receives a
+   * transcript. The first [Transcription] object is the input transcription, and the second is the
+   * output transcription.
+   *
+   * @param enableInterruptions If enabled, allows the user to speak over or interrupt the model's
+   * ongoing reply.
+   *
+   * **WARNING**: The user interruption feature relies on device-specific support, and may not be
+   * consistently available.
+   */
+  @RequiresPermission(RECORD_AUDIO)
+  public suspend fun startAudioConversation(
+    functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)? = null,
+    transcriptHandler: ((Transcription?, Transcription?) -> Unit)? = null,
+    enableInterruptions: Boolean = false,
+  ) {
 
     val context = firebaseApp.applicationContext
     if (
@@ -142,7 +172,7 @@ internal constructor(
       audioHelper = AudioHelper.build()
 
       recordUserAudio()
-      processModelResponses(functionCallHandler)
+      processModelResponses(functionCallHandler, transcriptHandler)
       listenForModelPlayback(enableInterruptions)
     }
   }
@@ -390,7 +420,8 @@ internal constructor(
    * function call.
    */
   private fun processModelResponses(
-    functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)?
+    functionCallHandler: ((FunctionCallPart) -> FunctionResponsePart)?,
+    transcriptHandler: ((Transcription?, Transcription?) -> Unit)?
   ) {
     receive()
       .onEach {
@@ -419,6 +450,9 @@ internal constructor(
             )
           }
           is LiveServerContent -> {
+            if (it.inputTranscription != null || it.outputTranscription != null) {
+              transcriptHandler?.invoke(it.inputTranscription, it.outputTranscription)
+            }
             if (it.interrupted) {
               playBackQueue.clear()
             } else {
diff --git a/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/Transcription.kt b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/Transcription.kt
new file mode 100644
index 00000000000..6dc65e5abdb
--- /dev/null
+++ b/firebase-ai/src/main/kotlin/com/google/firebase/ai/type/Transcription.kt
@@ -0,0 +1,33 @@
+/*
+ * Copyright 2025 Google LLC
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.google.firebase.ai.type
+
+import kotlinx.serialization.Serializable
+
+/**
+ * Audio transcription message.
+ * @property text Transcription text
+ */
+public class Transcription internal constructor(public val text: String?) {
+
+  @Serializable
+  internal data class Internal(val text: String?) {
+    fun toPublic(): Transcription {
+      return Transcription(text)
+    }
+  }
+}