firebase · andrewheard · Aug 9, 2025 · Aug 9, 2025 · Aug 10, 2025 · Aug 10, 2025
diff --git a/FirebaseAI/Sources/FirebaseAI.swift b/FirebaseAI/Sources/FirebaseAI.swift
@@ -130,6 +130,18 @@ public final class FirebaseAI: Sendable {
     )
   }
 
+  public func liveModel(modelName: String,
+                        generationConfig: LiveGenerationConfig? = nil,
+                        requestOptions: RequestOptions = RequestOptions()) -> LiveGenerativeModel {
+    return LiveGenerativeModel(
+      modelResourceName: modelResourceName(modelName: modelName),
+      firebaseInfo: firebaseInfo,
+      apiConfig: apiConfig,
+      generationConfig: generationConfig,
+      requestOptions: requestOptions
+    )
+  }
+
   /// Class to enable FirebaseAI to register via the Objective-C based Firebase component system
   /// to include FirebaseAI in the userAgent.
   @objc(FIRVertexAIComponent) class FirebaseVertexAIComponent: NSObject {}

diff --git a/FirebaseAI/Sources/Types/Internal/Live/ActivityHandling.swift b/FirebaseAI/Sources/Types/Internal/Live/ActivityHandling.swift
@@ -0,0 +1,35 @@
+// Copyright 2025 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+import Foundation
+
+/// The different ways of handling user activity.
+@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
+public struct ActivityHandling: EncodableProtoEnum, Hashable, Sendable {
+  enum Kind: String {
+    case interrupts = "START_OF_ACTIVITY_INTERRUPTS"
+    case noInterrupt = "NO_INTERRUPTION"
+  }
+
+  /// If true, start of activity will interrupt the model's response (also
+  /// called "barge in"). The model's current response will be cut-off in the
+  /// moment of the interruption. This is the default behavior.
+  public static let interrupts = ActivityHandling(kind: .interrupts)
+
+  /// The model's response will not be interrupted.
+  public static let noInterrupt = ActivityHandling(kind: .noInterrupt)
+
+  /// Returns the raw string representation of the `ActivityHandling` value.
+  public let rawValue: String
+}
diff --git a/FirebaseAI/Sources/Types/Internal/Live/AsyncWebSocket.swift b/FirebaseAI/Sources/Types/Internal/Live/AsyncWebSocket.swift
@@ -0,0 +1,107 @@
+// Copyright 2025 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+import Foundation
+
+final class AsyncWebSocket: NSObject, @unchecked Sendable, URLSessionWebSocketDelegate {
+  private let webSocketTask: URLSessionWebSocketTask
+  private let stream: AsyncThrowingStream<URLSessionWebSocketTask.Message, Error>
+  private let continuation: AsyncThrowingStream<URLSessionWebSocketTask.Message, Error>.Continuation
+  private var continuationFinished = false
+  private let continuationLock = NSLock()
+
+  private var _isConnected = false
+  private let isConnectedLock = NSLock()
+  private(set) var isConnected: Bool {
+    get { isConnectedLock.withLock { _isConnected } }
+    set { isConnectedLock.withLock { _isConnected = newValue } }
+  }
+
+  init(urlSession: URLSession = GenAIURLSession.default, urlRequest: URLRequest) {
+    webSocketTask = urlSession.webSocketTask(with: urlRequest)
+    (stream, continuation) = AsyncThrowingStream<URLSessionWebSocketTask.Message, Error>
+      .makeStream()
+  }
+
+  deinit {
+    webSocketTask.cancel(with: .goingAway, reason: nil)
+  }
+
+  func connect() -> AsyncThrowingStream<URLSessionWebSocketTask.Message, Error> {
+    webSocketTask.resume()
+    isConnected = true
+    startReceiving()
+    return stream
+  }
+
+  func disconnect() {
+    webSocketTask.cancel(with: .goingAway, reason: nil)
+    isConnected = false
+    continuationLock.withLock {
+      self.continuation.finish()
+      self.continuationFinished = true
+    }
+  }
+
+  func send(_ message: URLSessionWebSocketTask.Message) async throws {
+    // TODO: Throw error if socket already closed
+    try await webSocketTask.send(message)
+  }
+
+  private func startReceiving() {
+    Task {
+      while !Task.isCancelled && self.webSocketTask.isOpen && self.isConnected {
+        let message = try await webSocketTask.receive()
+        // TODO: Check continuationFinished before yielding. Use the same thread for NSLock.
+        continuation.yield(message)
+      }
+    }
+  }
+
+  func urlSession(_ session: URLSession,
+                  webSocketTask: URLSessionWebSocketTask,
+                  didCloseWith closeCode: URLSessionWebSocketTask.CloseCode,
+                  reason: Data?) {
+    continuationLock.withLock {
+      guard !continuationFinished else { return }
+      continuation.finish()
+      continuationFinished = true
+    }
+  }
+}
+
+private extension URLSessionWebSocketTask {
+  var isOpen: Bool {
+    return closeCode == .invalid
+  }
+}
+
+struct WebSocketClosedError: Error, Sendable, CustomNSError {
+  let closeCode: URLSessionWebSocketTask.CloseCode
+  let closeReason: String
+
+  init(closeCode: URLSessionWebSocketTask.CloseCode, closeReason: Data?) {
+    self.closeCode = closeCode
+    self.closeReason = closeReason
+      .flatMap { String(data: $0, encoding: .utf8) } ?? "Unknown reason."
+  }
+
+  var errorCode: Int { closeCode.rawValue }
+
+  var errorUserInfo: [String: Any] {
+    [
+      NSLocalizedDescriptionKey: "WebSocket closed with code \(closeCode.rawValue). Reason: \(closeReason)",
+    ]
+  }
+}
diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientContent.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientContent.swift
@@ -0,0 +1,35 @@
+// Copyright 2025 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+import Foundation
+
+/// Incremental update of the current conversation delivered from the client.
+/// All the content here is unconditionally appended to the conversation
+/// history and used as part of the prompt to the model to generate content.
+///
+/// A message here will interrupt any current model generation.
+@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
+struct BidiGenerateContentClientContent: Encodable {
+  /// The content appended to the current conversation with the model.
+  ///
+  /// For single-turn queries, this is a single instance. For multi-turn
+  /// queries, this is a repeated field that contains conversation history and
+  /// latest request.
+  let turns: [ModelContent]?
+
+  /// If true, indicates that the server content generation should start with
+  /// the currently accumulated prompt. Otherwise, the server will await
+  /// additional messages before starting generation.
+  let turnComplete: Bool?
+}
diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientMessage.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientMessage.swift
@@ -0,0 +1,55 @@
+// Copyright 2025 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+import Foundation
+
+/// Messages sent by the client in the BidiGenerateContent RPC call.
+@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
+enum BidiGenerateContentClientMessage {
+  /// Message to be sent in the first and only first client message.
+  case setup(BidiGenerateContentSetup)
+
+  /// Incremental update of the current conversation delivered from the client.
+  case clientContent(BidiGenerateContentClientContent)
+
+  /// User input that is sent in real time.
+  case realtimeInput(BidiGenerateContentRealtimeInput)
+
+  /// Response to a `ToolCallMessage` received from the server.
+  case toolResponse(BidiGenerateContentToolResponse)
+}
+
+@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
+extension BidiGenerateContentClientMessage: Encodable {
+  enum CodingKeys: CodingKey {
+    case setup
+    case clientContent
+    case realtimeInput
+    case toolResponse
+  }
+
+  func encode(to encoder: any Encoder) throws {
+    var container = encoder.container(keyedBy: CodingKeys.self)
+    switch self {
+    case let .setup(setup):
+      try container.encode(setup, forKey: .setup)
+    case let .clientContent(clientContent):
+      try container.encode(clientContent, forKey: .clientContent)
+    case let .realtimeInput(realtimeInput):
+      try container.encode(realtimeInput, forKey: .realtimeInput)
+    case let .toolResponse(toolResponse):
+      try container.encode(toolResponse, forKey: .toolResponse)
+    }
+  }
+}
diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentRealtimeInput.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentRealtimeInput.swift
@@ -0,0 +1,64 @@
+// Copyright 2025 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+import Foundation
+
+/// User input that is sent in real time.
+///
+/// This is different from `ClientContentUpdate` in a few ways:
+///
+/// - Can be sent continuously without interruption to model generation.
+/// - If there is a need to mix data interleaved across the
+///   `ClientContentUpdate` and the `RealtimeUpdate`, server attempts to
+///   optimize for best response, but there are no guarantees.
+/// - End of turn is not explicitly specified, but is rather derived from user
+///   activity (for example, end of speech).
+/// - Even before the end of turn, the data is processed incrementally
+///   to optimize for a fast start of the response from the model.
+/// - Is always assumed to be the user's input (cannot be used to populate
+///   conversation history).
+@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
+struct BidiGenerateContentRealtimeInput: Encodable {
+  /// These form the realtime audio input stream.
+  let audio: InlineData?
+
+  /// Indicates that the audio stream has ended, e.g. because the microphone was
+  /// turned off.
+  ///
+  /// This should only be sent when automatic activity detection is enabled
+  /// (which is the default).
+  ///
+  /// The client can reopen the stream by sending an audio message.
+  let audioStreamEnd: Bool?
+
+  /// These form the realtime video input stream.
+  let video: Data?
+
+  /// These form the realtime text input stream.
+  let text: String?
+
+  /// Marks the start of user activity.
+  struct ActivityStart: Encodable {}
+
+  /// Marks the start of user activity. This can only be sent if automatic
+  /// (i.e. server-side) activity detection is disabled.
+  let activityStart: ActivityStart?
+
+  /// Marks the end of user activity.
+  struct ActivityEnd: Encodable {}
+
+  /// Marks the end of user activity. This can only be sent if automatic (i.e.
+  // server-side) activity detection is disabled.
+  let activityEnd: ActivityEnd?
+}
diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerContent.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerContent.swift
@@ -0,0 +1,53 @@
+// Copyright 2025 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+import Foundation
+
+/// Incremental server update generated by the model in response to client
+/// messages.
+///
+/// Content is generated as quickly as possible, and not in realtime. Clients
+/// may choose to buffer and play it out in realtime.
+@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *)
+struct BidiGenerateContentServerContent: Decodable {
+  /// The content that the model has generated as part of the current
+  /// conversation with the user.
+  let modelTurn: ModelContent?
+
+  /// If true, indicates that the model is done generating. Generation will only
+  /// start in response to additional client messages. Can be set alongside
+  /// `content`, indicating that the `content` is the last in the turn.
+  let turnComplete: Bool?
+
+  /// If true, indicates that a client message has interrupted current model
+  /// generation. If the client is playing out the content in realtime, this is a
+  /// good signal to stop and empty the current queue. If the client is playing
+  /// out the content in realtime, this is a good signal to stop and empty the
+  /// current playback queue.
+  let interrupted: Bool?
+
+  /// If true, indicates that the model is done generating.
+  ///
+  /// When model is interrupted while generating there will be no
+  /// 'generation_complete' message in interrupted turn, it will go through
+  /// 'interrupted > turn_complete'.
+  ///
+  /// When model assumes realtime playback there will be delay between
+  /// generation_complete and turn_complete that is caused by model waiting for
+  /// playback to finish.
+  let generationComplete: Bool?
+
+  /// Metadata specifies sources used to ground generated content.
+  let groundingMetadata: GroundingMetadata?
+}