From 441b4a7eba975b5c9781d0a755f227a4d02ba7c8 Mon Sep 17 00:00:00 2001 From: Andrew Heard Date: Sat, 9 Aug 2025 12:47:55 -0400 Subject: [PATCH 01/98] [Firebase AI] Add starter types for Live API --- .../Internal/Live/ActivityHandling.swift | 35 +++++++++++ .../BidiGenerateContentClientContent.swift | 34 ++++++++++ .../BidiGenerateContentClientMessage.swift | 30 +++++++++ .../BidiGenerateContentRealtimeInput.swift | 63 +++++++++++++++++++ .../BidiGenerateContentServerContent.swift | 52 +++++++++++++++ .../BidiGenerateContentServerMessage.swift | 45 +++++++++++++ .../Live/BidiGenerateContentSetup.swift | 57 +++++++++++++++++ .../BidiGenerateContentSetupComplete.swift | 18 ++++++ .../Live/BidiGenerateContentToolCall.swift | 22 +++++++ ...iGenerateContentToolCallCancellation.swift | 25 ++++++++ .../BidiGenerateContentToolResponse.swift | 28 +++++++++ .../Types/Internal/Live/EndSensitivity.swift | 33 ++++++++++ .../Sources/Types/Internal/Live/GoAway.swift | 23 +++++++ .../Internal/Live/RealtimeInputConfig.swift | 54 ++++++++++++++++ .../Internal/Live/StartSensitivity.swift | 33 ++++++++++ .../Types/Internal/Live/TurnCoverage.swift | 36 +++++++++++ 16 files changed, 588 insertions(+) create mode 100644 FirebaseAI/Sources/Types/Internal/Live/ActivityHandling.swift create mode 100644 FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientContent.swift create mode 100644 FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientMessage.swift create mode 100644 FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentRealtimeInput.swift create mode 100644 FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerContent.swift create mode 100644 FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift create mode 100644 FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetup.swift create mode 100644 FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetupComplete.swift create mode 100644 FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolCall.swift create mode 100644 FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolCallCancellation.swift create mode 100644 FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolResponse.swift create mode 100644 FirebaseAI/Sources/Types/Internal/Live/EndSensitivity.swift create mode 100644 FirebaseAI/Sources/Types/Internal/Live/GoAway.swift create mode 100644 FirebaseAI/Sources/Types/Internal/Live/RealtimeInputConfig.swift create mode 100644 FirebaseAI/Sources/Types/Internal/Live/StartSensitivity.swift create mode 100644 FirebaseAI/Sources/Types/Internal/Live/TurnCoverage.swift diff --git a/FirebaseAI/Sources/Types/Internal/Live/ActivityHandling.swift b/FirebaseAI/Sources/Types/Internal/Live/ActivityHandling.swift new file mode 100644 index 00000000000..26867a98925 --- /dev/null +++ b/FirebaseAI/Sources/Types/Internal/Live/ActivityHandling.swift @@ -0,0 +1,35 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +/// The different ways of handling user activity. +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +public struct ActivityHandling: EncodableProtoEnum, Hashable, Sendable { + enum Kind: String { + case interrupts = "START_OF_ACTIVITY_INTERRUPTS" + case noInterrupt = "NO_INTERRUPTION" + } + + /// If true, start of activity will interrupt the model's response (also + /// called "barge in"). The model's current response will be cut-off in the + /// moment of the interruption. This is the default behavior. + public static let interrupts = ActivityHandling(kind: .interrupts) + + /// The model's response will not be interrupted. + public static let noInterrupt = ActivityHandling(kind: .noInterrupt) + + /// Returns the raw string representation of the `ActivityHandling` value. + public let rawValue: String +} diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientContent.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientContent.swift new file mode 100644 index 00000000000..91fed495ac5 --- /dev/null +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientContent.swift @@ -0,0 +1,34 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +/// Incremental update of the current conversation delivered from the client. +/// All the content here is unconditionally appended to the conversation +/// history and used as part of the prompt to the model to generate content. +/// +/// A message here will interrupt any current model generation. +struct BidiGenerateContentClientContent: Encodable { + /// The content appended to the current conversation with the model. + /// + /// For single-turn queries, this is a single instance. For multi-turn + /// queries, this is a repeated field that contains conversation history and + /// latest request. + let turns: [ModelContent]? + + /// If true, indicates that the server content generation should start with + /// the currently accumulated prompt. Otherwise, the server will await + /// additional messages before starting generation. + let turnComplete: Bool? +} diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientMessage.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientMessage.swift new file mode 100644 index 00000000000..88e9ac96896 --- /dev/null +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientMessage.swift @@ -0,0 +1,30 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +/// Messages sent by the client in the BidiGenerateContent RPC call. +enum BidiGenerateContentClientMessage: Encodable { + /// Message to be sent in the first and only first client message. + case setup(BidiGenerateContentSetup) + + /// Incremental update of the current conversation delivered from the client. + case clientContent(BidiGenerateContentClientContent) + + /// User input that is sent in real time. + case realtimeInput(BidiGenerateContentRealtimeInput) + + /// Response to a `ToolCallMessage` received from the server. + case toolResponse(BidiGenerateContentToolResponse) +} diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentRealtimeInput.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentRealtimeInput.swift new file mode 100644 index 00000000000..26a9f84d8d7 --- /dev/null +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentRealtimeInput.swift @@ -0,0 +1,63 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +/// User input that is sent in real time. +/// +/// This is different from `ClientContentUpdate` in a few ways: +/// +/// - Can be sent continuously without interruption to model generation. +/// - If there is a need to mix data interleaved across the +/// `ClientContentUpdate` and the `RealtimeUpdate`, server attempts to +/// optimize for best response, but there are no guarantees. +/// - End of turn is not explicitly specified, but is rather derived from user +/// activity (for example, end of speech). +/// - Even before the end of turn, the data is processed incrementally +/// to optimize for a fast start of the response from the model. +/// - Is always assumed to be the user's input (cannot be used to populate +/// conversation history). +struct BidiGenerateContentRealtimeInput: Encodable { + /// These form the realtime audio input stream. + let audio: Data? + + /// Indicates that the audio stream has ended, e.g. because the microphone was + /// turned off. + /// + /// This should only be sent when automatic activity detection is enabled + /// (which is the default). + /// + /// The client can reopen the stream by sending an audio message. + let audioStreamEnd: Bool? + + /// These form the realtime video input stream. + let video: Data? + + /// These form the realtime text input stream. + let text: String? + + /// Marks the start of user activity. + struct ActivityStart: Encodable {} + + /// Marks the start of user activity. This can only be sent if automatic + /// (i.e. server-side) activity detection is disabled. + let activityStart: ActivityStart? + + /// Marks the end of user activity. + struct ActivityEnd: Encodable {} + + /// Marks the end of user activity. This can only be sent if automatic (i.e. + // server-side) activity detection is disabled. + let activityEnd: ActivityEnd? +} diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerContent.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerContent.swift new file mode 100644 index 00000000000..f09ec48a303 --- /dev/null +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerContent.swift @@ -0,0 +1,52 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +/// Incremental server update generated by the model in response to client +/// messages. +/// +/// Content is generated as quickly as possible, and not in realtime. Clients +/// may choose to buffer and play it out in realtime. +struct BidiGenerateContentServerContent: Decodable { + /// The content that the model has generated as part of the current + /// conversation with the user. + let modelTurn: ModelContent? + + /// If true, indicates that the model is done generating. Generation will only + /// start in response to additional client messages. Can be set alongside + /// `content`, indicating that the `content` is the last in the turn. + let turnComplete: Bool? + + /// If true, indicates that a client message has interrupted current model + /// generation. If the client is playing out the content in realtime, this is a + /// good signal to stop and empty the current queue. If the client is playing + /// out the content in realtime, this is a good signal to stop and empty the + /// current playback queue. + let interrupted: Bool? + + /// If true, indicates that the model is done generating. + /// + /// When model is interrupted while generating there will be no + /// 'generation_complete' message in interrupted turn, it will go through + /// 'interrupted > turn_complete'. + /// + /// When model assumes realtime playback there will be delay between + /// generation_complete and turn_complete that is caused by model waiting for + /// playback to finish. + let generationComplete: Bool? + + /// Metadata specifies sources used to ground generated content. + let groundingMetadata: GroundingMetadata? +} diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift new file mode 100644 index 00000000000..627fa12d771 --- /dev/null +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift @@ -0,0 +1,45 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +/// Response message for BidiGenerateContent RPC call. +struct BidiGenerateContentServerMessage: Decodable { + /// The type of the message. + enum MessageType: Decodable { + /// Sent in response to a `BidiGenerateContentSetup` message from the client. + case setupComplete(BidiGenerateContentSetupComplete) + + /// Content generated by the model in response to client messages. + case serverContent(BidiGenerateContentServerContent) + + /// Request for the client to execute the `function_calls` and return the + /// responses with the matching `id`s. + case toolCall(BidiGenerateContentToolCall) + + /// Notification for the client that a previously issued + /// `ToolCallMessage` with the specified `id`s should have been not executed + /// and should be cancelled. + case toolCallCancellation(BidiGenerateContentToolCallCancellation) + + /// Server will disconnect soon. + case goAway(GoAway) + } + + /// The message type. + let messageType: MessageType + + /// Usage metadata about the response(s). + let usageMetadata: GenerateContentResponse.UsageMetadata? +} diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetup.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetup.swift new file mode 100644 index 00000000000..ec199a59b41 --- /dev/null +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetup.swift @@ -0,0 +1,57 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +/// Message to be sent in the first and only first +/// `BidiGenerateContentClientMessage`. Contains configuration that will apply +/// for the duration of the streaming RPC. +/// +/// Clients should wait for a `BidiGenerateContentSetupComplete` message before +/// sending any additional messages. +struct BidiGenerateContentSetup: Encodable { + /// The fully qualified name of the publisher model. + /// + /// Publisher model format: + /// `projects/{project}/locations/{location}/publishers/*/models/*` + let model: String + + /// Generation config. + /// + /// The following fields aren't supported: + /// + /// - `response_logprobs` + /// - `response_mime_type` + /// - `logprobs` + /// - `response_schema` + /// - `stop_sequence` + /// - `routing_config` + /// - `audio_timestamp` + let generationConfig: GenerationConfig? + + /// The user provided system instructions for the model. + /// Note: only text should be used in parts and content in each part will be + /// in a separate paragraph. + let systemInstruction: ModelContent? + + /// A list of `Tools` the model may use to generate the next response. + /// + /// A `Tool` is a piece of code that enables the system to interact with + /// external systems to perform an action, or set of actions, outside of + /// knowledge and scope of the model. + let tools: [Tool]? + + /// Configures the handling of realtime input. + let realtimeInputConfig: RealtimeInputConfig? +} diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetupComplete.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetupComplete.swift new file mode 100644 index 00000000000..a2b02c0caf2 --- /dev/null +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetupComplete.swift @@ -0,0 +1,18 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +/// Sent in response to a `BidiGenerateContentSetup` message from the client. +struct BidiGenerateContentSetupComplete: Decodable {} diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolCall.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolCall.swift new file mode 100644 index 00000000000..e53decadfab --- /dev/null +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolCall.swift @@ -0,0 +1,22 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +/// Request for the client to execute the `function_calls` and return the +/// responses with the matching `id`s. +struct BidiGenerateContentToolCall: Decodable { + /// The function call to be executed. + let functionCalls: [FunctionCall]? +} diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolCallCancellation.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolCallCancellation.swift new file mode 100644 index 00000000000..fb25fd9f330 --- /dev/null +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolCallCancellation.swift @@ -0,0 +1,25 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +/// Notification for the client that a previously issued `ToolCallMessage` +/// with the specified `id`s should have been not executed and should be +/// cancelled. If there were side-effects to those tool calls, clients may +/// attempt to undo the tool calls. This message occurs only in cases where the +/// clients interrupt server turns. +struct BidiGenerateContentToolCallCancellation: Decodable { + /// The ids of the tool calls to be cancelled. + let ids: [String]? +} diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolResponse.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolResponse.swift new file mode 100644 index 00000000000..245f2668a0e --- /dev/null +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolResponse.swift @@ -0,0 +1,28 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +/// Client generated response to a `ToolCall` received from the server. +/// Individual `FunctionResponse` objects are matched to the respective +/// `FunctionCall` objects by the `id` field. +/// +/// Note that in the unary and server-streaming GenerateContent APIs function +/// calling happens by exchanging the `Content` parts, while in the bidi +/// GenerateContent APIs function calling happens over these dedicated set of +/// messages. +struct BidiGenerateContentToolResponse: Encodable { + /// The response to the function calls. + let functionResponses: [FunctionResponse]? +} diff --git a/FirebaseAI/Sources/Types/Internal/Live/EndSensitivity.swift b/FirebaseAI/Sources/Types/Internal/Live/EndSensitivity.swift new file mode 100644 index 00000000000..6caa5d85440 --- /dev/null +++ b/FirebaseAI/Sources/Types/Internal/Live/EndSensitivity.swift @@ -0,0 +1,33 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +/// End of speech sensitivity. +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +public struct EndSensitivity: EncodableProtoEnum, Hashable, Sendable { + enum Kind: String { + case high = "END_SENSITIVITY_HIGH" + case low = "END_SENSITIVITY_LOW" + } + + /// Automatic detection will end speech more often. + public static let high = EndSensitivity(kind: .high) + + /// Automatic detection will end speech less often. + public static let low = EndSensitivity(kind: .low) + + /// Returns the raw string representation of the `EndSensitivity` value. + public let rawValue: String +} diff --git a/FirebaseAI/Sources/Types/Internal/Live/GoAway.swift b/FirebaseAI/Sources/Types/Internal/Live/GoAway.swift new file mode 100644 index 00000000000..729d86c6cfd --- /dev/null +++ b/FirebaseAI/Sources/Types/Internal/Live/GoAway.swift @@ -0,0 +1,23 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +/// Server will not be able to service client soon. +struct GoAway: Decodable { + /// The remaining time before the connection will be terminated as ABORTED. + /// The minimal time returned here is specified differently together with + /// the rate limits for a given model. + let timeLeft: TimeInterval? +} diff --git a/FirebaseAI/Sources/Types/Internal/Live/RealtimeInputConfig.swift b/FirebaseAI/Sources/Types/Internal/Live/RealtimeInputConfig.swift new file mode 100644 index 00000000000..8ebade9b98b --- /dev/null +++ b/FirebaseAI/Sources/Types/Internal/Live/RealtimeInputConfig.swift @@ -0,0 +1,54 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +/// Configures the realtime input behavior in `BidiGenerateContent`. +struct RealtimeInputConfig: Encodable { + /// Configures automatic detection of activity. + struct AutomaticActivityDetection: Encodable { + /// If enabled, detected voice and text input count as activity. If + /// disabled, the client must send activity signals. + let disabled: Bool? + + /// Determines how likely speech is to be detected. + let startOfSpeechSensitivity: StartSensitivity? + + /// Determines how likely detected speech is ended. + let endOfSpeechSensitivity: EndSensitivity? + + /// The required duration of detected speech before start-of-speech is + /// committed. The lower this value the more sensitive the start-of-speech + /// detection is and the shorter speech can be recognized. However, this + /// also increases the probability of false positives. + let prefixPaddingMS: Int? + + /// The required duration of detected silence (or non-speech) before + // end-of-speech is committed. The larger this value, the longer speech + // gaps can be without interrupting the user's activity but this will + // increase the model's latency. + let silenceDurationMS: Int? + } + + /// If not set, automatic activity detection is enabled by default. If + /// automatic voice detection is disabled, the client must send activity + /// signals. + let automaticActivityDetection: AutomaticActivityDetection? + + /// Defines what effect activity has. + let activityHandling: ActivityHandling? + + /// Defines which input is included in the user's turn. + let turnCoverage: TurnCoverage? +} diff --git a/FirebaseAI/Sources/Types/Internal/Live/StartSensitivity.swift b/FirebaseAI/Sources/Types/Internal/Live/StartSensitivity.swift new file mode 100644 index 00000000000..ef0e1fda073 --- /dev/null +++ b/FirebaseAI/Sources/Types/Internal/Live/StartSensitivity.swift @@ -0,0 +1,33 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +/// Start of speech sensitivity. +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +public struct StartSensitivity: EncodableProtoEnum, Hashable, Sendable { + enum Kind: String { + case high = "START_SENSITIVITY_HIGH" + case low = "START_SENSITIVITY_LOW" + } + + /// Automatic detection will detect the start of speech more often. + public static let high = StartSensitivity(kind: .high) + + /// Automatic detection will detect the start of speech less often. + public static let low = StartSensitivity(kind: .low) + + /// Returns the raw string representation of the `StartSensitivity` value. + public let rawValue: String +} diff --git a/FirebaseAI/Sources/Types/Internal/Live/TurnCoverage.swift b/FirebaseAI/Sources/Types/Internal/Live/TurnCoverage.swift new file mode 100644 index 00000000000..5d69fee78ce --- /dev/null +++ b/FirebaseAI/Sources/Types/Internal/Live/TurnCoverage.swift @@ -0,0 +1,36 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +/// Options about which input is included in the user's turn. +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +public struct TurnCoverage: EncodableProtoEnum, Hashable, Sendable { + enum Kind: String { + case onlyActivity = "TURN_INCLUDES_ONLY_ACTIVITY" + case allInput = "TURN_INCLUDES_ALL_INPUT" + } + + /// The users turn only includes activity since the last turn, excluding + /// inactivity (e.g. silence on the audio stream). + public static let onlyActivity = TurnCoverage(kind: .onlyActivity) + + /// The users turn includes all realtime input since the last turn, including + /// inactivity (e.g. silence on the audio stream). This is the default + // behavior. + public static let allInput = TurnCoverage(kind: .allInput) + + /// Returns the raw string representation of the `TurnCoverage` value. + public let rawValue: String +} From af76cbcba1c2a8a107b748ce8adb7aa117bfd2a4 Mon Sep 17 00:00:00 2001 From: Andrew Heard Date: Sat, 9 Aug 2025 13:09:40 -0400 Subject: [PATCH 02/98] Add placeholder types for `LiveGenerativeModel` and `LiveSession` --- .../Public/Live/LiveGenerativeModel.swift | 41 +++++++++++++++++++ .../Types/Public/Live/LiveSession.swift | 15 +++++++ 2 files changed, 56 insertions(+) create mode 100644 FirebaseAI/Sources/Types/Public/Live/LiveGenerativeModel.swift create mode 100644 FirebaseAI/Sources/Types/Public/Live/LiveSession.swift diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveGenerativeModel.swift b/FirebaseAI/Sources/Types/Public/Live/LiveGenerativeModel.swift new file mode 100644 index 00000000000..b93e6bfd4ae --- /dev/null +++ b/FirebaseAI/Sources/Types/Public/Live/LiveGenerativeModel.swift @@ -0,0 +1,41 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +public final class LiveGenerativeModel { + let modelResourceName: String + let apiConfig: APIConfig + let requestOptions: RequestOptions + + init(modelResourceName: String, + firebaseInfo: FirebaseInfo, + apiConfig: APIConfig, + requestOptions: RequestOptions, + urlSession: URLSession = GenAIURLSession.default) { + self.modelResourceName = modelResourceName + self.apiConfig = apiConfig + // TODO: Add LiveGenerationConfig + // TODO: Add tools + // TODO: Add tool config + // TODO: Add system instruction + self.requestOptions = requestOptions + } + + public func connect() async throws -> LiveSession { + // TODO: Implement connection + return LiveSession() + } +} diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift b/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift new file mode 100644 index 00000000000..f2c88d35492 --- /dev/null +++ b/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift @@ -0,0 +1,15 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +public final class LiveSession {} From 1b5d7917b65caa1c3daa8daf444293bf98c2b902 Mon Sep 17 00:00:00 2001 From: Andrew Heard Date: Sun, 10 Aug 2025 12:48:30 -0400 Subject: [PATCH 03/98] Fix `BidiGenerateContentClientMessage` encoding --- .../BidiGenerateContentClientMessage.swift | 25 ++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientMessage.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientMessage.swift index 88e9ac96896..147e986e863 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientMessage.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientMessage.swift @@ -15,7 +15,7 @@ import Foundation /// Messages sent by the client in the BidiGenerateContent RPC call. -enum BidiGenerateContentClientMessage: Encodable { +enum BidiGenerateContentClientMessage { /// Message to be sent in the first and only first client message. case setup(BidiGenerateContentSetup) @@ -28,3 +28,26 @@ enum BidiGenerateContentClientMessage: Encodable { /// Response to a `ToolCallMessage` received from the server. case toolResponse(BidiGenerateContentToolResponse) } + +extension BidiGenerateContentClientMessage: Encodable { + enum CodingKeys: CodingKey { + case setup + case clientContent + case realtimeInput + case toolResponse + } + + func encode(to encoder: any Encoder) throws { + var container = encoder.container(keyedBy: CodingKeys.self) + switch self { + case let .setup(setup): + try container.encode(setup, forKey: .setup) + case let .clientContent(clientContent): + try container.encode(clientContent, forKey: .clientContent) + case let .realtimeInput(realtimeInput): + try container.encode(realtimeInput, forKey: .realtimeInput) + case let .toolResponse(toolResponse): + try container.encode(toolResponse, forKey: .toolResponse) + } + } +} From da8c21bc4e393a13c7e8019aba6a3ab2f2c3c884 Mon Sep 17 00:00:00 2001 From: Andrew Heard Date: Sun, 10 Aug 2025 12:49:03 -0400 Subject: [PATCH 04/98] Fix `BidiGenerateContentServerMessage` decoding --- .../BidiGenerateContentServerMessage.swift | 56 ++++++++++++++++++- 1 file changed, 54 insertions(+), 2 deletions(-) diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift index 627fa12d771..cb3b5e0e4e3 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift @@ -15,9 +15,9 @@ import Foundation /// Response message for BidiGenerateContent RPC call. -struct BidiGenerateContentServerMessage: Decodable { +struct BidiGenerateContentServerMessage { /// The type of the message. - enum MessageType: Decodable { + enum MessageType { /// Sent in response to a `BidiGenerateContentSetup` message from the client. case setupComplete(BidiGenerateContentSetupComplete) @@ -43,3 +43,55 @@ struct BidiGenerateContentServerMessage: Decodable { /// Usage metadata about the response(s). let usageMetadata: GenerateContentResponse.UsageMetadata? } + +// MARK: - Decodable + +extension BidiGenerateContentServerMessage: Decodable { + enum CodingKeys: String, CodingKey { + case setupComplete + case serverContent + case toolCall + case toolCallCancellation + case goAway + case usageMetadata + } + + init(from decoder: any Decoder) throws { + let container = try decoder.container(keyedBy: CodingKeys.self) + + if let setupComplete = try container.decodeIfPresent( + BidiGenerateContentSetupComplete.self, + forKey: .setupComplete + ) { + messageType = .setupComplete(setupComplete) + } else if let serverContent = try container.decodeIfPresent( + BidiGenerateContentServerContent.self, + forKey: .serverContent + ) { + messageType = .serverContent(serverContent) + } else if let toolCall = try container.decodeIfPresent( + BidiGenerateContentToolCall.self, + forKey: .toolCall + ) { + messageType = .toolCall(toolCall) + } else if let toolCallCancellation = try container.decodeIfPresent( + BidiGenerateContentToolCallCancellation.self, + forKey: .toolCallCancellation + ) { + messageType = .toolCallCancellation(toolCallCancellation) + } else if let goAway = try container.decodeIfPresent(GoAway.self, forKey: .goAway) { + messageType = .goAway(goAway) + } else { + let context = DecodingError.Context( + codingPath: decoder.codingPath, + debugDescription: "Could not decode server message." + ) + throw DecodingError.dataCorrupted(context) + } + + usageMetadata = try container.decodeIfPresent( + GenerateContentResponse.UsageMetadata.self, + forKey: .usageMetadata + ) + } +} From aa3d148538624aa7948102d6d11b8722bb260c7b Mon Sep 17 00:00:00 2001 From: Andrew Heard Date: Sun, 10 Aug 2025 13:17:21 -0400 Subject: [PATCH 05/98] Add `LiveGenerationConfig` and add to setup --- .../Live/BidiGenerateContentSetup.swift | 24 +-- .../Public/Live/LiveGenerationConfig.swift | 155 ++++++++++++++++++ 2 files changed, 168 insertions(+), 11 deletions(-) create mode 100644 FirebaseAI/Sources/Types/Public/Live/LiveGenerationConfig.swift diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetup.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetup.swift index ec199a59b41..2744950d68f 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetup.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetup.swift @@ -28,17 +28,7 @@ struct BidiGenerateContentSetup: Encodable { let model: String /// Generation config. - /// - /// The following fields aren't supported: - /// - /// - `response_logprobs` - /// - `response_mime_type` - /// - `logprobs` - /// - `response_schema` - /// - `stop_sequence` - /// - `routing_config` - /// - `audio_timestamp` - let generationConfig: GenerationConfig? + let generationConfig: LiveGenerationConfig? /// The user provided system instructions for the model. /// Note: only text should be used in parts and content in each part will be @@ -54,4 +44,16 @@ struct BidiGenerateContentSetup: Encodable { /// Configures the handling of realtime input. let realtimeInputConfig: RealtimeInputConfig? + + init(model: String, + generationConfig: LiveGenerationConfig? = nil, + systemInstruction: ModelContent? = nil, + tools: [Tool]? = nil, + realtimeInputConfig: RealtimeInputConfig? = nil) { + self.model = model + self.generationConfig = generationConfig + self.systemInstruction = systemInstruction + self.tools = tools + self.realtimeInputConfig = realtimeInputConfig + } } diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveGenerationConfig.swift b/FirebaseAI/Sources/Types/Public/Live/LiveGenerationConfig.swift new file mode 100644 index 00000000000..ae961d14fb0 --- /dev/null +++ b/FirebaseAI/Sources/Types/Public/Live/LiveGenerationConfig.swift @@ -0,0 +1,155 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +/// A struct defining model parameters to be used when sending generative AI +/// requests to the backend model. +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +public struct LiveGenerationConfig: Sendable { + /// Controls the degree of randomness in token selection. + let temperature: Float? + + /// Controls diversity of generated text. + let topP: Float? + + /// Limits the number of highest probability words considered. + let topK: Int? + + /// The number of response variations to return. + let candidateCount: Int? + + /// Maximum number of tokens that can be generated in the response. + let maxOutputTokens: Int? + + /// Controls the likelihood of repeating the same words or phrases already generated in the text. + let presencePenalty: Float? + + /// Controls the likelihood of repeating words, with the penalty increasing for each repetition. + let frequencyPenalty: Float? + + /// Supported modalities of the response. + let responseModalities: [ResponseModality]? + + /// Creates a new `GenerationConfig` value. + /// + /// See the + /// [Configure model parameters](https://firebase.google.com/docs/vertex-ai/model-parameters) + /// guide and the + /// [Cloud documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference#generationconfig) + /// for more details. + /// + /// - Parameters: + /// - temperature:Controls the randomness of the language model's output. Higher values (for + /// example, 1.0) make the text more random and creative, while lower values (for example, + /// 0.1) make it more focused and deterministic. + /// + /// > Note: A temperature of 0 means that the highest probability tokens are always selected. + /// > In this case, responses for a given prompt are mostly deterministic, but a small amount + /// > of variation is still possible. + /// + /// > Important: The range of supported temperature values depends on the model; see the + /// > [documentation](https://firebase.google.com/docs/vertex-ai/model-parameters?platform=ios#temperature) + /// > for more details. + /// - topP: Controls diversity of generated text. Higher values (e.g., 0.9) produce more diverse + /// text, while lower values (e.g., 0.5) make the output more focused. + /// + /// The supported range is 0.0 to 1.0. + /// + /// > Important: The default `topP` value depends on the model; see the + /// > [documentation](https://firebase.google.com/docs/vertex-ai/model-parameters?platform=ios#top-p) + /// > for more details. + /// - topK: Limits the number of highest probability words the model considers when generating + /// text. For example, a topK of 40 means only the 40 most likely words are considered for the + /// next token. A higher value increases diversity, while a lower value makes the output more + /// deterministic. + /// + /// The supported range is 1 to 40. + /// + /// > Important: Support for `topK` and the default value depends on the model; see the + /// [documentation](https://firebase.google.com/docs/vertex-ai/model-parameters?platform=ios#top-k) + /// for more details. + /// - candidateCount: The number of response variations to return; defaults to 1 if not set. + /// Support for multiple candidates depends on the model; see the + /// [Cloud documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference#generationconfig) + /// for more details. + /// - maxOutputTokens: Maximum number of tokens that can be generated in the response. + /// See the configure model parameters [documentation](https://firebase.google.com/docs/vertex-ai/model-parameters?platform=ios#max-output-tokens) + /// for more details. + /// - presencePenalty: Controls the likelihood of repeating the same words or phrases already + /// generated in the text. Higher values increase the penalty of repetition, resulting in more + /// diverse output. + /// + /// > Note: While both `presencePenalty` and `frequencyPenalty` discourage repetition, + /// > `presencePenalty` applies the same penalty regardless of how many times the word/phrase + /// > has already appeared, whereas `frequencyPenalty` increases the penalty for *each* + /// > repetition of a word/phrase. + /// + /// > Important: The range of supported `presencePenalty` values depends on the model; see the + /// > [Cloud documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference#generationconfig) + /// > for more details + /// - frequencyPenalty: Controls the likelihood of repeating words or phrases, with the penalty + /// increasing for each repetition. Higher values increase the penalty of repetition, + /// resulting in more diverse output. + /// + /// > Note: While both `frequencyPenalty` and `presencePenalty` discourage repetition, + /// > `frequencyPenalty` increases the penalty for *each* repetition of a word/phrase, whereas + /// > `presencePenalty` applies the same penalty regardless of how many times the word/phrase + /// > has already appeared. + /// + /// > Important: The range of supported `frequencyPenalty` values depends on the model; see + /// > the + /// > [Cloud documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference#generationconfig) + /// > for more details + /// - responseModalities: The data types (modalities) that may be returned in model responses. + /// + /// See the [multimodal + /// responses](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal-response-generation) + /// documentation for more details. + /// + /// > Warning: Specifying response modalities is a **Public Preview** feature, which means + /// > that it is not subject to any SLA or deprecation policy and could change in + /// > backwards-incompatible ways. + public init(temperature: Float? = nil, topP: Float? = nil, topK: Int? = nil, + candidateCount: Int? = nil, maxOutputTokens: Int? = nil, + presencePenalty: Float? = nil, frequencyPenalty: Float? = nil, + responseModalities: [ResponseModality]? = nil) { + // Explicit init because otherwise if we re-arrange the above variables it changes the API + // surface. + self.temperature = temperature + self.topP = topP + self.topK = topK + self.candidateCount = candidateCount + self.maxOutputTokens = maxOutputTokens + self.presencePenalty = presencePenalty + self.frequencyPenalty = frequencyPenalty + self.responseModalities = responseModalities + } +} + +// MARK: - Codable Conformances + +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +extension LiveGenerationConfig: Encodable { + enum CodingKeys: String, CodingKey { + case temperature + case topP + case topK + case candidateCount + case maxOutputTokens + case presencePenalty + case frequencyPenalty + case responseModalities + } +} From 60003c14f2b5bc9314daa4fe0320e7e5b8a0fad0 Mon Sep 17 00:00:00 2001 From: Andrew Heard Date: Sun, 10 Aug 2025 13:18:46 -0400 Subject: [PATCH 06/98] Add temporary state machine in `LiveSession` --- .../Public/Live/LiveGenerativeModel.swift | 33 ++- .../Types/Public/Live/LiveSession.swift | 194 +++++++++++++++++- 2 files changed, 223 insertions(+), 4 deletions(-) diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveGenerativeModel.swift b/FirebaseAI/Sources/Types/Public/Live/LiveGenerativeModel.swift index b93e6bfd4ae..08648fe4e5f 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveGenerativeModel.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveGenerativeModel.swift @@ -17,25 +17,52 @@ import Foundation @available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) public final class LiveGenerativeModel { let modelResourceName: String + let firebaseInfo: FirebaseInfo let apiConfig: APIConfig + let generationConfig: LiveGenerationConfig? let requestOptions: RequestOptions + let urlSession: URLSession init(modelResourceName: String, firebaseInfo: FirebaseInfo, apiConfig: APIConfig, + generationConfig: LiveGenerationConfig? = nil, requestOptions: RequestOptions, urlSession: URLSession = GenAIURLSession.default) { self.modelResourceName = modelResourceName + self.firebaseInfo = firebaseInfo self.apiConfig = apiConfig - // TODO: Add LiveGenerationConfig + self.generationConfig = generationConfig // TODO: Add tools // TODO: Add tool config // TODO: Add system instruction self.requestOptions = requestOptions + self.urlSession = urlSession } public func connect() async throws -> LiveSession { - // TODO: Implement connection - return LiveSession() + let liveSession = LiveSession( + modelResourceName: modelResourceName, + generationConfig: generationConfig, + url: webSocketURL(), + urlSession: urlSession + ) + print("Opening Live Session...") + try await liveSession.open() + return liveSession + } + + func webSocketURL() -> URL { + let urlString = switch apiConfig.service { + case .vertexAI: + "wss://firebasevertexai.googleapis.com/ws/google.firebase.vertexai.v1beta.LlmBidiService/BidiGenerateContent/locations/us-central1?key=\(firebaseInfo.apiKey)" + case .googleAI: + "wss://firebasevertexai.googleapis.com/ws/google.firebase.vertexai.v1beta.GenerativeService/BidiGenerateContent?key=\(firebaseInfo.apiKey)" + } + guard let url = URL(string: urlString) else { + // TODO: Add error handling + fatalError() + } + return url } } diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift b/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift index f2c88d35492..fdefd9b5b4e 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift @@ -12,4 +12,196 @@ // See the License for the specific language governing permissions and // limitations under the License. -public final class LiveSession {} +import Foundation + +// TODO: Extract most of this file into a service class similar to `GenerativeAIService`. +public final class LiveSession: NSObject, URLSessionWebSocketDelegate, URLSessionTaskDelegate { + private enum State { + case notConnected + case connecting + case setupSent + case ready + case closed + } + + private enum WebSocketError: Error { + case connectionClosed + } + + let modelResourceName: String + let generationConfig: LiveGenerationConfig? + let webSocket: URLSessionWebSocketTask + + private var state: State = .notConnected + private var pendingMessages: [(String, CheckedContinuation)] = [] + private let jsonEncoder = JSONEncoder() + private let jsonDecoder = JSONDecoder() + + init(modelResourceName: String, + generationConfig: LiveGenerationConfig?, + url: URL, + urlSession: URLSession) { + self.modelResourceName = modelResourceName + self.generationConfig = generationConfig + webSocket = urlSession.webSocketTask(with: url) + } + + func open() async throws { + guard state == .notConnected else { + print("Web socket is not in a valid state to be opened: \(state)") + return + } + + state = .connecting + webSocket.delegate = self + webSocket.resume() + + print("Opening websocket") + } + + private func failPendingMessages(with error: Error) { + for (_, continuation) in pendingMessages { + continuation.resume(throwing: error) + } + pendingMessages.removeAll() + } + + private func processPendingMessages() { + for (message, continuation) in pendingMessages { + Task { + do { + try await send(message) + continuation.resume() + } catch { + continuation.resume(throwing: error) + } + } + } + pendingMessages.removeAll() + } + + private func send(_ message: String) async throws { + let content = ModelContent(role: "user", parts: [message]) + let clientContent = BidiGenerateContentClientContent(turns: [content], turnComplete: true) + let clientMessage = BidiGenerateContentClientMessage.clientContent(clientContent) + let clientMessageData = try jsonEncoder.encode(clientMessage) + let clientMessageJSON = String(data: clientMessageData, encoding: .utf8) + print("Client Message JSON: \(clientMessageJSON)") + try await webSocket.send(.data(clientMessageData)) + setReceiveHandler() + } + + public func sendMessage(_ message: String) async throws { + if state == .ready { + try await send(message) + } else { + try await withCheckedThrowingContinuation { continuation in + pendingMessages.append((message, continuation)) + } + } + } + + public func urlSession(_ session: URLSession, + webSocketTask: URLSessionWebSocketTask, + didOpenWithProtocol protocol: String?) { + print("Web Socket opened.") + + guard state == .connecting else { + print("Web socket is not in a valid state to be opened: \(state)") + return + } + + do { + let setup = BidiGenerateContentSetup( + model: modelResourceName, generationConfig: generationConfig + ) + let message = BidiGenerateContentClientMessage.setup(setup) + let messageData = try jsonEncoder.encode(message) + let messageJSON = String(data: messageData, encoding: .utf8) + print("JSON: \(messageJSON)") + webSocketTask.send(.data(messageData)) { error in + if let error { + print("Send Error: \(error)") + self.state = .closed + self.failPendingMessages(with: error) + return + } + + self.state = .setupSent + self.setReceiveHandler() + } + } catch { + print(error) + state = .closed + failPendingMessages(with: error) + } + } + + public func urlSession(_ session: URLSession, + webSocketTask: URLSessionWebSocketTask, + didCloseWith closeCode: URLSessionWebSocketTask.CloseCode, + reason: Data?) { + print("Web Socket closed.") + state = .closed + failPendingMessages(with: WebSocketError.connectionClosed) + } + + func setReceiveHandler() { + guard state == .setupSent || state == .ready else { + print("Web socket is not in a valid state to receive messages: \(state)") + return + } + + webSocket.receive { result in + do { + let message = try result.get() + switch message { + case let .string(string): + print("Unexpected string response: \(string)") + self.setReceiveHandler() + case let .data(data): + let response = try self.jsonDecoder.decode( + BidiGenerateContentServerMessage.self, + from: data + ) + let responseJSON = String(data: data, encoding: .utf8) + + switch response.messageType { + case .setupComplete: + print("Setup Complete: \(responseJSON)") + self.state = .ready + self.processPendingMessages() + case .serverContent: + // TODO: Return the serverContent to the developer + print("Server Content: \(responseJSON)") + case .toolCall: + // TODO: Tool calls not yet implemented + print("Tool Call: \(responseJSON)") + case .toolCallCancellation: + // TODO: Tool call cancellation not yet implemented + print("Tool Call Cancellation: \(responseJSON)") + case let .goAway(goAway): + if let timeLeft = goAway.timeLeft { + print("Server will disconnect in \(timeLeft) seconds.") + } else { + print("Server will disconnect soon.") + } + } + + if self.state == .closed { + print("Web socket is closed, not listening for more messages.") + } else { + self.setReceiveHandler() + } + @unknown default: + print("Unknown message received") + self.setReceiveHandler() + } + } catch { + // handle the error + print(error) + self.state = .closed + } + } + } +} From 3f285b8db256574d640943b9782515eb51aea38a Mon Sep 17 00:00:00 2001 From: Andrew Heard Date: Sun, 10 Aug 2025 13:19:25 -0400 Subject: [PATCH 07/98] Add `liveModel` static method to construct `LiveGenerativeModel` --- FirebaseAI/Sources/FirebaseAI.swift | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/FirebaseAI/Sources/FirebaseAI.swift b/FirebaseAI/Sources/FirebaseAI.swift index 7f05d8a0d7b..9a701026043 100644 --- a/FirebaseAI/Sources/FirebaseAI.swift +++ b/FirebaseAI/Sources/FirebaseAI.swift @@ -141,6 +141,18 @@ public final class FirebaseAI: Sendable { ) } + public func liveModel(modelName: String, + generationConfig: LiveGenerationConfig? = nil, + requestOptions: RequestOptions = RequestOptions()) -> LiveGenerativeModel { + return LiveGenerativeModel( + modelResourceName: modelResourceName(modelName: modelName), + firebaseInfo: firebaseInfo, + apiConfig: apiConfig, + generationConfig: generationConfig, + requestOptions: requestOptions + ) + } + /// Class to enable FirebaseAI to register via the Objective-C based Firebase component system /// to include FirebaseAI in the userAgent. @objc(FIRVertexAIComponent) class FirebaseVertexAIComponent: NSObject {} From 737d8234438fce5b32c12b5293ec771cd28dbce4 Mon Sep 17 00:00:00 2001 From: Andrew Heard Date: Sun, 10 Aug 2025 13:57:33 -0400 Subject: [PATCH 08/98] Emit `responses` from `LiveSession` --- .../Live/BidiGenerateContentServerMessage.swift | 6 ++++-- .../Sources/Types/Public/Live/LiveSession.swift | 14 +++++++++++++- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift index cb3b5e0e4e3..9270bb5c7c3 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift @@ -15,7 +15,9 @@ import Foundation /// Response message for BidiGenerateContent RPC call. -struct BidiGenerateContentServerMessage { +public struct BidiGenerateContentServerMessage { + // TODO: Make this type `internal` + /// The type of the message. enum MessageType { /// Sent in response to a `BidiGenerateContentSetup` message from the client. @@ -56,7 +58,7 @@ extension BidiGenerateContentServerMessage: Decodable { case usageMetadata } - init(from decoder: any Decoder) throws { + public init(from decoder: any Decoder) throws { let container = try decoder.container(keyedBy: CodingKeys.self) if let setupComplete = try container.decodeIfPresent( diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift b/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift index fdefd9b5b4e..dfab4734ae6 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift @@ -32,11 +32,18 @@ public final class LiveSession: NSObject, URLSessionWebSocketDelegate, URLSessio let generationConfig: LiveGenerationConfig? let webSocket: URLSessionWebSocketTask + // TODO: Refactor this property, potentially returning responses after `connect`. + public let responses: AsyncThrowingStream + private var state: State = .notConnected private var pendingMessages: [(String, CheckedContinuation)] = [] private let jsonEncoder = JSONEncoder() private let jsonDecoder = JSONDecoder() + // TODO: Properly wrap callback code using `withCheckedContinuation` or similar. + private let responseContinuation: AsyncThrowingStream + .Continuation + init(modelResourceName: String, generationConfig: LiveGenerationConfig?, url: URL, @@ -44,6 +51,7 @@ public final class LiveSession: NSObject, URLSessionWebSocketDelegate, URLSessio self.modelResourceName = modelResourceName self.generationConfig = generationConfig webSocket = urlSession.webSocketTask(with: url) + (responses, responseContinuation) = AsyncThrowingStream.makeStream() } func open() async throws { @@ -64,6 +72,7 @@ public final class LiveSession: NSObject, URLSessionWebSocketDelegate, URLSessio continuation.resume(throwing: error) } pendingMessages.removeAll() + responseContinuation.finish(throwing: error) } private func processPendingMessages() { @@ -144,6 +153,7 @@ public final class LiveSession: NSObject, URLSessionWebSocketDelegate, URLSessio print("Web Socket closed.") state = .closed failPendingMessages(with: WebSocketError.connectionClosed) + responseContinuation.finish() } func setReceiveHandler() { @@ -172,7 +182,6 @@ public final class LiveSession: NSObject, URLSessionWebSocketDelegate, URLSessio self.state = .ready self.processPendingMessages() case .serverContent: - // TODO: Return the serverContent to the developer print("Server Content: \(responseJSON)") case .toolCall: // TODO: Tool calls not yet implemented @@ -188,6 +197,8 @@ public final class LiveSession: NSObject, URLSessionWebSocketDelegate, URLSessio } } + self.responseContinuation.yield(response) + if self.state == .closed { print("Web socket is closed, not listening for more messages.") } else { @@ -201,6 +212,7 @@ public final class LiveSession: NSObject, URLSessionWebSocketDelegate, URLSessio // handle the error print(error) self.state = .closed + self.responseContinuation.finish(throwing: error) } } } From 91dd102ec831e82908855f4d2985231fe32474e4 Mon Sep 17 00:00:00 2001 From: Andrew Heard Date: Sun, 10 Aug 2025 13:59:53 -0400 Subject: [PATCH 09/98] Temporarily display text Bidi responses in TestApp --- .../Tests/TestApp/Sources/ContentView.swift | 31 ++++++++++++++++--- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/FirebaseAI/Tests/TestApp/Sources/ContentView.swift b/FirebaseAI/Tests/TestApp/Sources/ContentView.swift index 52af5939455..56631916627 100644 --- a/FirebaseAI/Tests/TestApp/Sources/ContentView.swift +++ b/FirebaseAI/Tests/TestApp/Sources/ContentView.swift @@ -12,17 +12,40 @@ // See the License for the specific language governing permissions and // limitations under the License. +import FirebaseAI import SwiftUI struct ContentView: View { + // TODO: Revert changes in this file. For prototyping purposes only. + let liveModel: LiveGenerativeModel = { + // let firebaseAI = FirebaseAI.firebaseAI(backend: .vertexAI()) + let firebaseAI = FirebaseAI.firebaseAI() + return firebaseAI.liveModel( + modelName: "gemini-2.0-flash-live-001", + generationConfig: LiveGenerationConfig(responseModalities: [.text]) + ) + }() + + @State private var responses: [String] = [] + var body: some View { VStack { - Image(systemName: "globe") - .imageScale(.large) - .foregroundStyle(.tint) - Text("Hello, world!") + List(responses, id: \.self) { + Text($0) + } } .padding() + .task { + do { + let liveSession = try await liveModel.connect() + try await liveSession.sendMessage("Why is the sky blue?") + for try await response in liveSession.responses { + responses.append(String(describing: response)) + } + } catch { + print(error) + } + } } } From dd89e2087cfac95064faab0d26f295348639d804 Mon Sep 17 00:00:00 2001 From: Andrew Heard Date: Sun, 10 Aug 2025 15:51:08 -0400 Subject: [PATCH 10/98] Refactor to use async/await and remove `URLSessionWebSocketDelegate` --- .../BidiGenerateContentServerMessage.swift | 2 +- .../Public/Live/LiveGenerativeModel.swift | 4 +- .../Types/Public/Live/LiveSession.swift | 184 +++--------------- .../Tests/TestApp/Sources/ContentView.swift | 2 +- 4 files changed, 35 insertions(+), 157 deletions(-) diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift index 9270bb5c7c3..761739afabd 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift @@ -15,7 +15,7 @@ import Foundation /// Response message for BidiGenerateContent RPC call. -public struct BidiGenerateContentServerMessage { +public struct BidiGenerateContentServerMessage: Sendable { // TODO: Make this type `internal` /// The type of the message. diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveGenerativeModel.swift b/FirebaseAI/Sources/Types/Public/Live/LiveGenerativeModel.swift index 08648fe4e5f..689e690a631 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveGenerativeModel.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveGenerativeModel.swift @@ -40,7 +40,7 @@ public final class LiveGenerativeModel { self.urlSession = urlSession } - public func connect() async throws -> LiveSession { + public func connect() -> LiveSession { let liveSession = LiveSession( modelResourceName: modelResourceName, generationConfig: generationConfig, @@ -48,7 +48,7 @@ public final class LiveGenerativeModel { urlSession: urlSession ) print("Opening Live Session...") - try await liveSession.open() + liveSession.openConnection() return liveSession } diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift b/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift index dfab4734ae6..641c93b4b06 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift @@ -15,35 +15,18 @@ import Foundation // TODO: Extract most of this file into a service class similar to `GenerativeAIService`. -public final class LiveSession: NSObject, URLSessionWebSocketDelegate, URLSessionTaskDelegate { - private enum State { - case notConnected - case connecting - case setupSent - case ready - case closed - } - - private enum WebSocketError: Error { - case connectionClosed - } - +public final class LiveSession: Sendable { let modelResourceName: String let generationConfig: LiveGenerationConfig? let webSocket: URLSessionWebSocketTask - // TODO: Refactor this property, potentially returning responses after `connect`. public let responses: AsyncThrowingStream + private let responseContinuation: AsyncThrowingStream + .Continuation - private var state: State = .notConnected - private var pendingMessages: [(String, CheckedContinuation)] = [] private let jsonEncoder = JSONEncoder() private let jsonDecoder = JSONDecoder() - // TODO: Properly wrap callback code using `withCheckedContinuation` or similar. - private let responseContinuation: AsyncThrowingStream - .Continuation - init(modelResourceName: String, generationConfig: LiveGenerationConfig?, url: URL, @@ -54,166 +37,61 @@ public final class LiveSession: NSObject, URLSessionWebSocketDelegate, URLSessio (responses, responseContinuation) = AsyncThrowingStream.makeStream() } - func open() async throws { - guard state == .notConnected else { - print("Web socket is not in a valid state to be opened: \(state)") - return - } - - state = .connecting - webSocket.delegate = self - webSocket.resume() - - print("Opening websocket") - } - - private func failPendingMessages(with error: Error) { - for (_, continuation) in pendingMessages { - continuation.resume(throwing: error) - } - pendingMessages.removeAll() - responseContinuation.finish(throwing: error) + deinit { + webSocket.cancel(with: .goingAway, reason: nil) } - private func processPendingMessages() { - for (message, continuation) in pendingMessages { - Task { - do { - try await send(message) - continuation.resume() - } catch { - continuation.resume(throwing: error) - } - } - } - pendingMessages.removeAll() - } - - private func send(_ message: String) async throws { + public func sendMessage(_ message: String) async throws { let content = ModelContent(role: "user", parts: [message]) let clientContent = BidiGenerateContentClientContent(turns: [content], turnComplete: true) let clientMessage = BidiGenerateContentClientMessage.clientContent(clientContent) let clientMessageData = try jsonEncoder.encode(clientMessage) - let clientMessageJSON = String(data: clientMessageData, encoding: .utf8) - print("Client Message JSON: \(clientMessageJSON)") try await webSocket.send(.data(clientMessageData)) - setReceiveHandler() } - public func sendMessage(_ message: String) async throws { - if state == .ready { - try await send(message) - } else { - try await withCheckedThrowingContinuation { continuation in - pendingMessages.append((message, continuation)) - } + func openConnection() { + webSocket.resume() + // TODO: Verify that this task gets cancelled on deinit + Task { + await startEventLoop() } } - public func urlSession(_ session: URLSession, - webSocketTask: URLSessionWebSocketTask, - didOpenWithProtocol protocol: String?) { - print("Web Socket opened.") - - guard state == .connecting else { - print("Web socket is not in a valid state to be opened: \(state)") - return + private func startEventLoop() async { + defer { + webSocket.cancel(with: .goingAway, reason: nil) } do { - let setup = BidiGenerateContentSetup( - model: modelResourceName, generationConfig: generationConfig - ) - let message = BidiGenerateContentClientMessage.setup(setup) - let messageData = try jsonEncoder.encode(message) - let messageJSON = String(data: messageData, encoding: .utf8) - print("JSON: \(messageJSON)") - webSocketTask.send(.data(messageData)) { error in - if let error { - print("Send Error: \(error)") - self.state = .closed - self.failPendingMessages(with: error) - return - } - - self.state = .setupSent - self.setReceiveHandler() - } - } catch { - print(error) - state = .closed - failPendingMessages(with: error) - } - } - - public func urlSession(_ session: URLSession, - webSocketTask: URLSessionWebSocketTask, - didCloseWith closeCode: URLSessionWebSocketTask.CloseCode, - reason: Data?) { - print("Web Socket closed.") - state = .closed - failPendingMessages(with: WebSocketError.connectionClosed) - responseContinuation.finish() - } - - func setReceiveHandler() { - guard state == .setupSent || state == .ready else { - print("Web socket is not in a valid state to receive messages: \(state)") - return - } + try await sendSetupMessage() - webSocket.receive { result in - do { - let message = try result.get() + while !Task.isCancelled { + let message = try await webSocket.receive() switch message { case let .string(string): print("Unexpected string response: \(string)") - self.setReceiveHandler() case let .data(data): - let response = try self.jsonDecoder.decode( + let response = try jsonDecoder.decode( BidiGenerateContentServerMessage.self, from: data ) - let responseJSON = String(data: data, encoding: .utf8) - - switch response.messageType { - case .setupComplete: - print("Setup Complete: \(responseJSON)") - self.state = .ready - self.processPendingMessages() - case .serverContent: - print("Server Content: \(responseJSON)") - case .toolCall: - // TODO: Tool calls not yet implemented - print("Tool Call: \(responseJSON)") - case .toolCallCancellation: - // TODO: Tool call cancellation not yet implemented - print("Tool Call Cancellation: \(responseJSON)") - case let .goAway(goAway): - if let timeLeft = goAway.timeLeft { - print("Server will disconnect in \(timeLeft) seconds.") - } else { - print("Server will disconnect soon.") - } - } - - self.responseContinuation.yield(response) - - if self.state == .closed { - print("Web socket is closed, not listening for more messages.") - } else { - self.setReceiveHandler() - } + responseContinuation.yield(response) @unknown default: print("Unknown message received") - self.setReceiveHandler() } - } catch { - // handle the error - print(error) - self.state = .closed - self.responseContinuation.finish(throwing: error) } + } catch { + responseContinuation.finish(throwing: error) } + responseContinuation.finish() + } + + private func sendSetupMessage() async throws { + let setup = BidiGenerateContentSetup( + model: modelResourceName, generationConfig: generationConfig + ) + let message = BidiGenerateContentClientMessage.setup(setup) + let messageData = try jsonEncoder.encode(message) + try await webSocket.send(.data(messageData)) } } diff --git a/FirebaseAI/Tests/TestApp/Sources/ContentView.swift b/FirebaseAI/Tests/TestApp/Sources/ContentView.swift index 56631916627..37ef5fd527a 100644 --- a/FirebaseAI/Tests/TestApp/Sources/ContentView.swift +++ b/FirebaseAI/Tests/TestApp/Sources/ContentView.swift @@ -37,7 +37,7 @@ struct ContentView: View { .padding() .task { do { - let liveSession = try await liveModel.connect() + let liveSession = liveModel.connect() try await liveSession.sendMessage("Why is the sky blue?") for try await response in liveSession.responses { responses.append(String(describing: response)) From 962b57f9f6bc4c53242a0abda411d253ca3b6846 Mon Sep 17 00:00:00 2001 From: Andrew Heard Date: Sun, 10 Aug 2025 15:56:03 -0400 Subject: [PATCH 11/98] Add platform availability annotations --- .../Types/Internal/Live/BidiGenerateContentClientContent.swift | 1 + .../Types/Internal/Live/BidiGenerateContentClientMessage.swift | 2 ++ .../Types/Internal/Live/BidiGenerateContentRealtimeInput.swift | 1 + .../Types/Internal/Live/BidiGenerateContentServerContent.swift | 1 + .../Types/Internal/Live/BidiGenerateContentServerMessage.swift | 1 + .../Sources/Types/Internal/Live/BidiGenerateContentSetup.swift | 1 + .../Types/Internal/Live/BidiGenerateContentSetupComplete.swift | 1 + .../Types/Internal/Live/BidiGenerateContentToolCall.swift | 1 + .../Internal/Live/BidiGenerateContentToolCallCancellation.swift | 1 + .../Types/Internal/Live/BidiGenerateContentToolResponse.swift | 1 + FirebaseAI/Sources/Types/Internal/Live/GoAway.swift | 1 + .../Sources/Types/Internal/Live/RealtimeInputConfig.swift | 1 + FirebaseAI/Sources/Types/Public/Live/LiveSession.swift | 1 + 13 files changed, 14 insertions(+) diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientContent.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientContent.swift index 91fed495ac5..a24944d83fd 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientContent.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientContent.swift @@ -19,6 +19,7 @@ import Foundation /// history and used as part of the prompt to the model to generate content. /// /// A message here will interrupt any current model generation. +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) struct BidiGenerateContentClientContent: Encodable { /// The content appended to the current conversation with the model. /// diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientMessage.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientMessage.swift index 147e986e863..d4e47982af1 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientMessage.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientMessage.swift @@ -15,6 +15,7 @@ import Foundation /// Messages sent by the client in the BidiGenerateContent RPC call. +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) enum BidiGenerateContentClientMessage { /// Message to be sent in the first and only first client message. case setup(BidiGenerateContentSetup) @@ -29,6 +30,7 @@ enum BidiGenerateContentClientMessage { case toolResponse(BidiGenerateContentToolResponse) } +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) extension BidiGenerateContentClientMessage: Encodable { enum CodingKeys: CodingKey { case setup diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentRealtimeInput.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentRealtimeInput.swift index 26a9f84d8d7..8f57b8875f2 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentRealtimeInput.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentRealtimeInput.swift @@ -28,6 +28,7 @@ import Foundation /// to optimize for a fast start of the response from the model. /// - Is always assumed to be the user's input (cannot be used to populate /// conversation history). +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) struct BidiGenerateContentRealtimeInput: Encodable { /// These form the realtime audio input stream. let audio: Data? diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerContent.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerContent.swift index f09ec48a303..8d9d1e8940b 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerContent.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerContent.swift @@ -19,6 +19,7 @@ import Foundation /// /// Content is generated as quickly as possible, and not in realtime. Clients /// may choose to buffer and play it out in realtime. +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) struct BidiGenerateContentServerContent: Decodable { /// The content that the model has generated as part of the current /// conversation with the user. diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift index 761739afabd..7b0ce692db4 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift @@ -15,6 +15,7 @@ import Foundation /// Response message for BidiGenerateContent RPC call. +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) public struct BidiGenerateContentServerMessage: Sendable { // TODO: Make this type `internal` diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetup.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetup.swift index 2744950d68f..5541b7c107a 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetup.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetup.swift @@ -20,6 +20,7 @@ import Foundation /// /// Clients should wait for a `BidiGenerateContentSetupComplete` message before /// sending any additional messages. +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) struct BidiGenerateContentSetup: Encodable { /// The fully qualified name of the publisher model. /// diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetupComplete.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetupComplete.swift index a2b02c0caf2..cbf1dc6d960 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetupComplete.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetupComplete.swift @@ -15,4 +15,5 @@ import Foundation /// Sent in response to a `BidiGenerateContentSetup` message from the client. +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) struct BidiGenerateContentSetupComplete: Decodable {} diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolCall.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolCall.swift index e53decadfab..86ded221fc3 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolCall.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolCall.swift @@ -16,6 +16,7 @@ import Foundation /// Request for the client to execute the `function_calls` and return the /// responses with the matching `id`s. +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) struct BidiGenerateContentToolCall: Decodable { /// The function call to be executed. let functionCalls: [FunctionCall]? diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolCallCancellation.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolCallCancellation.swift index fb25fd9f330..096e8a1a11e 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolCallCancellation.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolCallCancellation.swift @@ -19,6 +19,7 @@ import Foundation /// cancelled. If there were side-effects to those tool calls, clients may /// attempt to undo the tool calls. This message occurs only in cases where the /// clients interrupt server turns. +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) struct BidiGenerateContentToolCallCancellation: Decodable { /// The ids of the tool calls to be cancelled. let ids: [String]? diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolResponse.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolResponse.swift index 245f2668a0e..8b4e4ba48b2 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolResponse.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolResponse.swift @@ -22,6 +22,7 @@ import Foundation /// calling happens by exchanging the `Content` parts, while in the bidi /// GenerateContent APIs function calling happens over these dedicated set of /// messages. +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) struct BidiGenerateContentToolResponse: Encodable { /// The response to the function calls. let functionResponses: [FunctionResponse]? diff --git a/FirebaseAI/Sources/Types/Internal/Live/GoAway.swift b/FirebaseAI/Sources/Types/Internal/Live/GoAway.swift index 729d86c6cfd..45a2a7e944d 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/GoAway.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/GoAway.swift @@ -15,6 +15,7 @@ import Foundation /// Server will not be able to service client soon. +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) struct GoAway: Decodable { /// The remaining time before the connection will be terminated as ABORTED. /// The minimal time returned here is specified differently together with diff --git a/FirebaseAI/Sources/Types/Internal/Live/RealtimeInputConfig.swift b/FirebaseAI/Sources/Types/Internal/Live/RealtimeInputConfig.swift index 8ebade9b98b..08bcfe076f8 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/RealtimeInputConfig.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/RealtimeInputConfig.swift @@ -15,6 +15,7 @@ import Foundation /// Configures the realtime input behavior in `BidiGenerateContent`. +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) struct RealtimeInputConfig: Encodable { /// Configures automatic detection of activity. struct AutomaticActivityDetection: Encodable { diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift b/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift index 641c93b4b06..63542320236 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift @@ -15,6 +15,7 @@ import Foundation // TODO: Extract most of this file into a service class similar to `GenerativeAIService`. +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) public final class LiveSession: Sendable { let modelResourceName: String let generationConfig: LiveGenerationConfig? From 95e19088965b613494b8eb3126a2439f715ac2fb Mon Sep 17 00:00:00 2001 From: Andrew Heard Date: Mon, 11 Aug 2025 10:04:45 -0400 Subject: [PATCH 12/98] Add `BidiGenerateContentServerMessage` availability annotation --- .../Types/Internal/Live/BidiGenerateContentServerMessage.swift | 1 + 1 file changed, 1 insertion(+) diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift index 7b0ce692db4..950819e0343 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift @@ -49,6 +49,7 @@ public struct BidiGenerateContentServerMessage: Sendable { // MARK: - Decodable +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) extension BidiGenerateContentServerMessage: Decodable { enum CodingKeys: String, CodingKey { case setupComplete From c6f3ba8f56084ebab96afd8bd3055ec697d84ffb Mon Sep 17 00:00:00 2001 From: Andrew Heard Date: Mon, 11 Aug 2025 23:29:46 -0400 Subject: [PATCH 13/98] Add `AsyncWebSocket` wrapper for `URLSessionWebSocketTask` --- .../Types/Internal/Live/AsyncWebSocket.swift | 107 ++++++++++++++++++ .../BidiGenerateContentRealtimeInput.swift | 2 +- .../Types/Public/Live/LiveSession.swift | 55 ++++----- 3 files changed, 130 insertions(+), 34 deletions(-) create mode 100644 FirebaseAI/Sources/Types/Internal/Live/AsyncWebSocket.swift diff --git a/FirebaseAI/Sources/Types/Internal/Live/AsyncWebSocket.swift b/FirebaseAI/Sources/Types/Internal/Live/AsyncWebSocket.swift new file mode 100644 index 00000000000..6a1da33241f --- /dev/null +++ b/FirebaseAI/Sources/Types/Internal/Live/AsyncWebSocket.swift @@ -0,0 +1,107 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +final class AsyncWebSocket: NSObject, @unchecked Sendable, URLSessionWebSocketDelegate { + private let webSocketTask: URLSessionWebSocketTask + private let stream: AsyncThrowingStream + private let continuation: AsyncThrowingStream.Continuation + private var continuationFinished = false + private let continuationLock = NSLock() + + private var _isConnected = false + private let isConnectedLock = NSLock() + private(set) var isConnected: Bool { + get { isConnectedLock.withLock { _isConnected } } + set { isConnectedLock.withLock { _isConnected = newValue } } + } + + init(urlSession: URLSession = GenAIURLSession.default, urlRequest: URLRequest) { + webSocketTask = urlSession.webSocketTask(with: urlRequest) + (stream, continuation) = AsyncThrowingStream + .makeStream() + } + + deinit { + webSocketTask.cancel(with: .goingAway, reason: nil) + } + + func connect() -> AsyncThrowingStream { + webSocketTask.resume() + isConnected = true + startReceiving() + return stream + } + + func disconnect() { + webSocketTask.cancel(with: .goingAway, reason: nil) + isConnected = false + continuationLock.withLock { + self.continuation.finish() + self.continuationFinished = true + } + } + + func send(_ message: URLSessionWebSocketTask.Message) async throws { + // TODO: Throw error if socket already closed + try await webSocketTask.send(message) + } + + private func startReceiving() { + Task { + while !Task.isCancelled && self.webSocketTask.isOpen && self.isConnected { + let message = try await webSocketTask.receive() + // TODO: Check continuationFinished before yielding. Use the same thread for NSLock. + continuation.yield(message) + } + } + } + + func urlSession(_ session: URLSession, + webSocketTask: URLSessionWebSocketTask, + didCloseWith closeCode: URLSessionWebSocketTask.CloseCode, + reason: Data?) { + continuationLock.withLock { + guard !continuationFinished else { return } + continuation.finish() + continuationFinished = true + } + } +} + +private extension URLSessionWebSocketTask { + var isOpen: Bool { + return closeCode == .invalid + } +} + +struct WebSocketClosedError: Error, Sendable, CustomNSError { + let closeCode: URLSessionWebSocketTask.CloseCode + let closeReason: String + + init(closeCode: URLSessionWebSocketTask.CloseCode, closeReason: Data?) { + self.closeCode = closeCode + self.closeReason = closeReason + .flatMap { String(data: $0, encoding: .utf8) } ?? "Unknown reason." + } + + var errorCode: Int { closeCode.rawValue } + + var errorUserInfo: [String: Any] { + [ + NSLocalizedDescriptionKey: "WebSocket closed with code \(closeCode.rawValue). Reason: \(closeReason)", + ] + } +} diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentRealtimeInput.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentRealtimeInput.swift index 8f57b8875f2..3849b10c561 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentRealtimeInput.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentRealtimeInput.swift @@ -31,7 +31,7 @@ import Foundation @available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) struct BidiGenerateContentRealtimeInput: Encodable { /// These form the realtime audio input stream. - let audio: Data? + let audio: InlineData? /// Indicates that the audio stream has ended, e.g. because the microphone was /// turned off. diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift b/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift index 63542320236..edf248c440e 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift @@ -19,7 +19,7 @@ import Foundation public final class LiveSession: Sendable { let modelResourceName: String let generationConfig: LiveGenerationConfig? - let webSocket: URLSessionWebSocketTask + let webSocket: AsyncWebSocket public let responses: AsyncThrowingStream private let responseContinuation: AsyncThrowingStream @@ -34,12 +34,12 @@ public final class LiveSession: Sendable { urlSession: URLSession) { self.modelResourceName = modelResourceName self.generationConfig = generationConfig - webSocket = urlSession.webSocketTask(with: url) + webSocket = AsyncWebSocket(urlSession: urlSession, urlRequest: URLRequest(url: url)) (responses, responseContinuation) = AsyncThrowingStream.makeStream() } deinit { - webSocket.cancel(with: .goingAway, reason: nil) + webSocket.disconnect() } public func sendMessage(_ message: String) async throws { @@ -51,40 +51,29 @@ public final class LiveSession: Sendable { } func openConnection() { - webSocket.resume() - // TODO: Verify that this task gets cancelled on deinit Task { - await startEventLoop() - } - } - - private func startEventLoop() async { - defer { - webSocket.cancel(with: .goingAway, reason: nil) - } - - do { - try await sendSetupMessage() - - while !Task.isCancelled { - let message = try await webSocket.receive() - switch message { - case let .string(string): - print("Unexpected string response: \(string)") - case let .data(data): - let response = try jsonDecoder.decode( - BidiGenerateContentServerMessage.self, - from: data - ) - responseContinuation.yield(response) - @unknown default: - print("Unknown message received") + do { + let stream = webSocket.connect() + try await sendSetupMessage() + for try await message in stream { + switch message { + case let .string(string): + print("Unexpected string response: \(string)") + case let .data(data): + let response = try jsonDecoder.decode( + BidiGenerateContentServerMessage.self, + from: data + ) + responseContinuation.yield(response) + @unknown default: + print("Unknown message received") + } } + } catch { + responseContinuation.finish(throwing: error) } - } catch { - responseContinuation.finish(throwing: error) + responseContinuation.finish() } - responseContinuation.finish() } private func sendSetupMessage() async throws { From a9eb3d8280157cbd35773e1a84a21c3dc3d60dd2 Mon Sep 17 00:00:00 2001 From: Daymon Date: Tue, 9 Sep 2025 14:23:36 -0500 Subject: [PATCH 14/98] Update --- FirebaseAI/Sources/AILog.swift | 9 + FirebaseAI/Sources/FirebaseAI.swift | 25 +- FirebaseAI/Sources/GenerativeAIService.swift | 52 +-- .../Sources/Types/Internal/AppCheck.swift | 65 +++ .../Sources/Types/Internal/InternalPart.swift | 4 +- .../Types/Internal/Live/AsyncWebSocket.swift | 55 ++- .../BidiGenerateContentRealtimeInput.swift | 13 +- .../BidiGenerateContentServerContent.swift | 4 +- .../BidiGenerateContentServerMessage.swift | 4 +- .../Live/BidiGenerateContentSetup.swift | 16 +- .../BidiGenerateContentSetupComplete.swift | 2 +- .../Live/BidiGenerateContentToolCall.swift | 2 +- ...iGenerateContentToolCallCancellation.swift | 2 +- .../BidiGenerateContentTranscription.swift | 19 + .../Types/Internal/Live/EndSensitivity.swift | 2 +- .../Sources/Types/Internal/Live/GoAway.swift | 2 +- .../Internal/Live/LiveSessionService.swift | 405 ++++++++++++++++++ .../Public/Live/LiveGenerationConfig.swift | 1 + .../Public/Live/LiveGenerativeModel.swift | 53 +-- .../Types/Public/Live/LiveServerContent.swift | 75 ++++ .../Types/Public/Live/LiveServerGoAway.swift | 31 ++ .../Types/Public/Live/LiveServerMessage.swift | 72 ++++ .../Public/Live/LiveServerToolCall.swift | 32 ++ .../Live/LiveServerToolCallCancellation.swift | 30 ++ .../Types/Public/Live/LiveSession.swift | 156 ++++--- .../Types/Public/Live/LiveTranscript.swift | 25 ++ FirebaseAI/Sources/Types/Public/Part.swift | 14 +- .../Types/Public/ResponseModality.swift | 3 + 28 files changed, 1007 insertions(+), 166 deletions(-) create mode 100644 FirebaseAI/Sources/Types/Internal/AppCheck.swift create mode 100644 FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentTranscription.swift create mode 100644 FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift create mode 100644 FirebaseAI/Sources/Types/Public/Live/LiveServerContent.swift create mode 100644 FirebaseAI/Sources/Types/Public/Live/LiveServerGoAway.swift create mode 100644 FirebaseAI/Sources/Types/Public/Live/LiveServerMessage.swift create mode 100644 FirebaseAI/Sources/Types/Public/Live/LiveServerToolCall.swift create mode 100644 FirebaseAI/Sources/Types/Public/Live/LiveServerToolCallCancellation.swift create mode 100644 FirebaseAI/Sources/Types/Public/Live/LiveTranscript.swift diff --git a/FirebaseAI/Sources/AILog.swift b/FirebaseAI/Sources/AILog.swift index fe04716384a..ffbfef28a83 100644 --- a/FirebaseAI/Sources/AILog.swift +++ b/FirebaseAI/Sources/AILog.swift @@ -66,12 +66,21 @@ enum AILog { case codeExecutionResultUnrecognizedOutcome = 3015 case executableCodeUnrecognizedLanguage = 3016 case fallbackValueUsed = 3017 + case liveSessionUnsupportedMessage = 3018 + case liveSessionFailedToEncodeClientMessage = 3019 + case liveSessionFailedToEncodeClientMessagePayload = 3020 + case liveSessionFailedToSendClientMessage = 3021 + case liveSessionUnexpectedResponse = 3022 + // SDK State Errors case generateContentResponseNoCandidates = 4000 case generateContentResponseNoText = 4001 case appCheckTokenFetchFailed = 4002 case generateContentResponseEmptyCandidates = 4003 + case invalidWebsocketURL = 4004 + case duplicateLiveSessionSetupComplete = 4005 + // SDK Debugging case loadRequestStreamResponseLine = 5000 diff --git a/FirebaseAI/Sources/FirebaseAI.swift b/FirebaseAI/Sources/FirebaseAI.swift index 9a701026043..fcea43dbab4 100644 --- a/FirebaseAI/Sources/FirebaseAI.swift +++ b/FirebaseAI/Sources/FirebaseAI.swift @@ -141,15 +141,38 @@ public final class FirebaseAI: Sendable { ) } + /// **[Public Preview]** Initializes a ``LiveGenerativeModel`` with the given parameters. + /// + /// > Warning: For Firebase AI SDK, bidirectional streaming using Live models is in Public + /// Preview, which means that the feature is not subject to any SLA or deprecation policy and + /// could change in backwards-incompatible ways. + /// + /// > Important: Only Live models (typically containing `live-*` in the name) are supported. + /// + /// - Parameters: + /// - modelName: The name of the Livemodel to use, for example + /// `"gemini-live-2.5-flash-preview"`; + /// see [model versions](https://firebase.google.com/docs/ai-logic/live-api?api=dev#models-that-support-capability) + /// for a list of supported Live models. + /// - generationConfig: The content generation parameters your model should use. + /// - tools: A list of ``Tool`` objects that the model may use to generate the next response. + /// - toolConfig: Tool configuration for any ``Tool`` specified in the request. + /// - systemInstruction: Instructions that direct the model to behave a certain way; currently + /// only text content is supported. public func liveModel(modelName: String, generationConfig: LiveGenerationConfig? = nil, + tools: [Tool]? = nil, + toolConfig: ToolConfig? = nil, + systemInstruction: ModelContent? = nil, requestOptions: RequestOptions = RequestOptions()) -> LiveGenerativeModel { return LiveGenerativeModel( modelResourceName: modelResourceName(modelName: modelName), firebaseInfo: firebaseInfo, apiConfig: apiConfig, generationConfig: generationConfig, - requestOptions: requestOptions + tools: tools, + toolConfig: toolConfig, + systemInstruction: systemInstruction ) } diff --git a/FirebaseAI/Sources/GenerativeAIService.swift b/FirebaseAI/Sources/GenerativeAIService.swift index 8056d4172b8..a17364f8cb6 100644 --- a/FirebaseAI/Sources/GenerativeAIService.swift +++ b/FirebaseAI/Sources/GenerativeAIService.swift @@ -177,7 +177,10 @@ struct GenerativeAIService { urlRequest.setValue("application/json", forHTTPHeaderField: "Content-Type") if let appCheck = firebaseInfo.appCheck { - let tokenResult = try await fetchAppCheckToken(appCheck: appCheck) + let tokenResult = try await appCheck.fetchAppCheckToken( + limitedUse: firebaseInfo.useLimitedUseAppCheckTokens, + domain: "GenerativeAIService" + ) urlRequest.setValue(tokenResult.token, forHTTPHeaderField: "X-Firebase-AppCheck") if let error = tokenResult.error { AILog.error( @@ -207,53 +210,6 @@ struct GenerativeAIService { return urlRequest } - private func fetchAppCheckToken(appCheck: AppCheckInterop) async throws - -> FIRAppCheckTokenResultInterop { - if firebaseInfo.useLimitedUseAppCheckTokens { - if let token = await getLimitedUseAppCheckToken(appCheck: appCheck) { - return token - } - - let errorMessage = - "The provided App Check token provider doesn't implement getLimitedUseToken(), but requireLimitedUseTokens was enabled." - - #if Debug - fatalError(errorMessage) - #else - throw NSError( - domain: "\(Constants.baseErrorDomain).\(Self.self)", - code: AILog.MessageCode.appCheckTokenFetchFailed.rawValue, - userInfo: [NSLocalizedDescriptionKey: errorMessage] - ) - #endif - } - - return await appCheck.getToken(forcingRefresh: false) - } - - private func getLimitedUseAppCheckToken(appCheck: AppCheckInterop) async - -> FIRAppCheckTokenResultInterop? { - // At the moment, `await` doesn’t get along with Objective-C’s optional protocol methods. - await withCheckedContinuation { (continuation: CheckedContinuation< - FIRAppCheckTokenResultInterop?, - Never - >) in - guard - firebaseInfo.useLimitedUseAppCheckTokens, - // `getLimitedUseToken(completion:)` is an optional protocol method. Optional binding - // is performed to make sure `continuation` is called even if the method’s not implemented. - let limitedUseTokenClosure = appCheck.getLimitedUseToken - else { - return continuation.resume(returning: nil) - } - - limitedUseTokenClosure { tokenResult in - // The placeholder token should be used in the case of App Check error. - continuation.resume(returning: tokenResult) - } - } - } - private func httpResponse(urlResponse: URLResponse) throws -> HTTPURLResponse { // The following condition should always be true: "Whenever you make HTTP URL load requests, any // response objects you get back from the URLSession, NSURLConnection, or NSURLDownload class diff --git a/FirebaseAI/Sources/Types/Internal/AppCheck.swift b/FirebaseAI/Sources/Types/Internal/AppCheck.swift new file mode 100644 index 00000000000..9510e5640e6 --- /dev/null +++ b/FirebaseAI/Sources/Types/Internal/AppCheck.swift @@ -0,0 +1,65 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import FirebaseAppCheckInterop + +// TODO: document +internal extension AppCheckInterop { + // TODO: Document + func fetchAppCheckToken(limitedUse: Bool, + domain: String) async throws -> FIRAppCheckTokenResultInterop { + if limitedUse { + if let token = await getLimitedUseTokenAsync() { + return token + } + + let errorMessage = + "The provided App Check token provider doesn't implement getLimitedUseToken(), but requireLimitedUseTokens was enabled." + + #if Debug + fatalError(errorMessage) + #else + throw NSError( + domain: "\(Constants.baseErrorDomain).\(domain)", + code: AILog.MessageCode.appCheckTokenFetchFailed.rawValue, + userInfo: [NSLocalizedDescriptionKey: errorMessage] + ) + #endif + } + + return await getToken(forcingRefresh: false) + } + + private func getLimitedUseTokenAsync() async + -> FIRAppCheckTokenResultInterop? { + // At the moment, `await` doesn’t get along with Objective-C’s optional protocol methods. + await withCheckedContinuation { (continuation: CheckedContinuation< + FIRAppCheckTokenResultInterop?, + Never + >) in + guard + // `getLimitedUseToken(completion:)` is an optional protocol method. Optional binding + // is performed to make sure `continuation` is called even if the method’s not implemented. + let limitedUseTokenClosure = getLimitedUseToken + else { + return continuation.resume(returning: nil) + } + + limitedUseTokenClosure { tokenResult in + // The placeholder token should be used in the case of App Check error. + continuation.resume(returning: tokenResult) + } + } + } +} diff --git a/FirebaseAI/Sources/Types/Internal/InternalPart.swift b/FirebaseAI/Sources/Types/Internal/InternalPart.swift index a8afe4439c3..521a75a694d 100644 --- a/FirebaseAI/Sources/Types/Internal/InternalPart.swift +++ b/FirebaseAI/Sources/Types/Internal/InternalPart.swift @@ -56,10 +56,12 @@ struct FunctionCall: Equatable, Sendable { struct FunctionResponse: Codable, Equatable, Sendable { let name: String let response: JSONObject + let id: String? - init(name: String, response: JSONObject) { + init(name: String, response: JSONObject, id: String? = nil) { self.name = name self.response = response + self.id = id } } diff --git a/FirebaseAI/Sources/Types/Internal/Live/AsyncWebSocket.swift b/FirebaseAI/Sources/Types/Internal/Live/AsyncWebSocket.swift index 6a1da33241f..f64f467347a 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/AsyncWebSocket.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/AsyncWebSocket.swift @@ -21,11 +21,11 @@ final class AsyncWebSocket: NSObject, @unchecked Sendable, URLSessionWebSocketDe private var continuationFinished = false private let continuationLock = NSLock() - private var _isConnected = false - private let isConnectedLock = NSLock() - private(set) var isConnected: Bool { - get { isConnectedLock.withLock { _isConnected } } - set { isConnectedLock.withLock { _isConnected = newValue } } + private var _closeError: WebSocketClosedError? = nil + private let closeErrorLock = NSLock() + private(set) var closeError: WebSocketClosedError? { + get { closeErrorLock.withLock { _closeError } } + set { closeErrorLock.withLock { _closeError = newValue } } } init(urlSession: URLSession = GenAIURLSession.default, urlRequest: URLRequest) { @@ -40,44 +40,55 @@ final class AsyncWebSocket: NSObject, @unchecked Sendable, URLSessionWebSocketDe func connect() -> AsyncThrowingStream { webSocketTask.resume() - isConnected = true + closeError = nil startReceiving() return stream } func disconnect() { - webSocketTask.cancel(with: .goingAway, reason: nil) - isConnected = false - continuationLock.withLock { - self.continuation.finish() - self.continuationFinished = true - } + if let closeError { return } + + close(code: .goingAway, reason: nil) } func send(_ message: URLSessionWebSocketTask.Message) async throws { - // TODO: Throw error if socket already closed + if let closeError { + throw closeError + } try await webSocketTask.send(message) } private func startReceiving() { Task { - while !Task.isCancelled && self.webSocketTask.isOpen && self.isConnected { - let message = try await webSocketTask.receive() - // TODO: Check continuationFinished before yielding. Use the same thread for NSLock. - continuation.yield(message) + while !Task.isCancelled && self.webSocketTask.isOpen && self.closeError == nil { + do { + let message = try await webSocketTask.receive() + continuation.yield(message) + } catch { + close(code: webSocketTask.closeCode, reason: webSocketTask.closeReason) + } } } } + private func close(code: URLSessionWebSocketTask.CloseCode, reason: Data?) { + let error = WebSocketClosedError(closeCode: code, closeReason: reason) + closeError = error + + webSocketTask.cancel(with: code, reason: reason) + + continuationLock.withLock { + guard !continuationFinished else { return } + self.continuation.finish(throwing: error) + self.continuationFinished = true + } + } + func urlSession(_ session: URLSession, webSocketTask: URLSessionWebSocketTask, didCloseWith closeCode: URLSessionWebSocketTask.CloseCode, reason: Data?) { - continuationLock.withLock { - guard !continuationFinished else { return } - continuation.finish() - continuationFinished = true - } + close(code: closeCode, reason: reason) } } diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentRealtimeInput.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentRealtimeInput.swift index 3849b10c561..7566228282e 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentRealtimeInput.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentRealtimeInput.swift @@ -43,7 +43,7 @@ struct BidiGenerateContentRealtimeInput: Encodable { let audioStreamEnd: Bool? /// These form the realtime video input stream. - let video: Data? + let video: InlineData? /// These form the realtime text input stream. let text: String? @@ -61,4 +61,15 @@ struct BidiGenerateContentRealtimeInput: Encodable { /// Marks the end of user activity. This can only be sent if automatic (i.e. // server-side) activity detection is disabled. let activityEnd: ActivityEnd? + + init(audio: InlineData? = nil, video: InlineData? = nil, text: String? = nil, + activityStart: ActivityStart? = nil, activityEnd: ActivityEnd? = nil, + audioStreamEnd: Bool? = nil) { + self.audio = audio + self.video = video + self.text = text + self.activityStart = activityStart + self.activityEnd = activityEnd + self.audioStreamEnd = audioStreamEnd + } } diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerContent.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerContent.swift index 8d9d1e8940b..e52d32be8bf 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerContent.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerContent.swift @@ -20,7 +20,7 @@ import Foundation /// Content is generated as quickly as possible, and not in realtime. Clients /// may choose to buffer and play it out in realtime. @available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) -struct BidiGenerateContentServerContent: Decodable { +struct BidiGenerateContentServerContent: Decodable, Sendable { /// The content that the model has generated as part of the current /// conversation with the user. let modelTurn: ModelContent? @@ -50,4 +50,6 @@ struct BidiGenerateContentServerContent: Decodable { /// Metadata specifies sources used to ground generated content. let groundingMetadata: GroundingMetadata? + + let outputTranscription: BidiGenerateContentTranscription? } diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift index 950819e0343..1b46582a079 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift @@ -16,11 +16,11 @@ import Foundation /// Response message for BidiGenerateContent RPC call. @available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) -public struct BidiGenerateContentServerMessage: Sendable { +struct BidiGenerateContentServerMessage: Sendable { // TODO: Make this type `internal` /// The type of the message. - enum MessageType { + enum MessageType: Sendable { /// Sent in response to a `BidiGenerateContentSetup` message from the client. case setupComplete(BidiGenerateContentSetupComplete) diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetup.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetup.swift index 5541b7c107a..9f154cb2292 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetup.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetup.swift @@ -43,18 +43,32 @@ struct BidiGenerateContentSetup: Encodable { /// knowledge and scope of the model. let tools: [Tool]? + let toolConfig: ToolConfig? + /// Configures the handling of realtime input. let realtimeInputConfig: RealtimeInputConfig? + let inputAudioTranscription: AudioTranscriptionConfig? + + let outputAudioTranscription: AudioTranscriptionConfig? + init(model: String, generationConfig: LiveGenerationConfig? = nil, systemInstruction: ModelContent? = nil, tools: [Tool]? = nil, - realtimeInputConfig: RealtimeInputConfig? = nil) { + toolConfig: ToolConfig? = nil, + realtimeInputConfig: RealtimeInputConfig? = nil, + inputAudioTranscription: AudioTranscriptionConfig? = nil, + outputAudioTranscription: AudioTranscriptionConfig? = nil) { self.model = model self.generationConfig = generationConfig self.systemInstruction = systemInstruction self.tools = tools + self.toolConfig = toolConfig self.realtimeInputConfig = realtimeInputConfig + self.inputAudioTranscription = inputAudioTranscription + self.outputAudioTranscription = outputAudioTranscription } } + +struct AudioTranscriptionConfig: Encodable {} diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetupComplete.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetupComplete.swift index cbf1dc6d960..acf5a0572fe 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetupComplete.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetupComplete.swift @@ -16,4 +16,4 @@ import Foundation /// Sent in response to a `BidiGenerateContentSetup` message from the client. @available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) -struct BidiGenerateContentSetupComplete: Decodable {} +struct BidiGenerateContentSetupComplete: Decodable, Sendable {} diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolCall.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolCall.swift index 86ded221fc3..18ea0336500 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolCall.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolCall.swift @@ -17,7 +17,7 @@ import Foundation /// Request for the client to execute the `function_calls` and return the /// responses with the matching `id`s. @available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) -struct BidiGenerateContentToolCall: Decodable { +struct BidiGenerateContentToolCall: Decodable, Sendable { /// The function call to be executed. let functionCalls: [FunctionCall]? } diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolCallCancellation.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolCallCancellation.swift index 096e8a1a11e..5cd3d616c15 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolCallCancellation.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolCallCancellation.swift @@ -20,7 +20,7 @@ import Foundation /// attempt to undo the tool calls. This message occurs only in cases where the /// clients interrupt server turns. @available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) -struct BidiGenerateContentToolCallCancellation: Decodable { +struct BidiGenerateContentToolCallCancellation: Decodable, Sendable { /// The ids of the tool calls to be cancelled. let ids: [String]? } diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentTranscription.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentTranscription.swift new file mode 100644 index 00000000000..6ce345538a1 --- /dev/null +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentTranscription.swift @@ -0,0 +1,19 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +struct BidiGenerateContentTranscription: Decodable, Sendable { + let text: String? + let finished: Bool? +} diff --git a/FirebaseAI/Sources/Types/Internal/Live/EndSensitivity.swift b/FirebaseAI/Sources/Types/Internal/Live/EndSensitivity.swift index 6caa5d85440..bacf814794d 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/EndSensitivity.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/EndSensitivity.swift @@ -16,7 +16,7 @@ import Foundation /// End of speech sensitivity. @available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) -public struct EndSensitivity: EncodableProtoEnum, Hashable, Sendable { +struct EndSensitivity: EncodableProtoEnum, Hashable, Sendable { enum Kind: String { case high = "END_SENSITIVITY_HIGH" case low = "END_SENSITIVITY_LOW" diff --git a/FirebaseAI/Sources/Types/Internal/Live/GoAway.swift b/FirebaseAI/Sources/Types/Internal/Live/GoAway.swift index 45a2a7e944d..573f89133f6 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/GoAway.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/GoAway.swift @@ -16,7 +16,7 @@ import Foundation /// Server will not be able to service client soon. @available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) -struct GoAway: Decodable { +struct GoAway: Decodable, Sendable { /// The remaining time before the connection will be terminated as ABORTED. /// The minimal time returned here is specified differently together with /// the rate limits for a given model. diff --git a/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift b/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift new file mode 100644 index 00000000000..6f1d98e7217 --- /dev/null +++ b/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift @@ -0,0 +1,405 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +// TODO: remove @preconcurrency when we update to Swift 6 +// for context, see +// https://forums.swift.org/t/why-does-sending-a-sendable-value-risk-causing-data-races/73074 +@preconcurrency import FirebaseAppCheckInterop +@preconcurrency import FirebaseAuthInterop + +/// Facilitates communication with the backend for a ``LiveSession``. +/// +/// Using an actor will make it easier to adopt session resumption, as we have an isolated place for +/// mainting mutablity, which is backed +/// by Swift concurrency implicity; allowing us to avoid various edge-case issues with dead-locks +/// and data races. +/// +/// This mainly comes into play when we don't want to block developers from sending messages while a +/// session is being reloaded. +@available(macOS 12.0, *) +actor LiveSessionService { + public let responses: AsyncThrowingStream + private let responseContinuation: AsyncThrowingStream + .Continuation + + // to ensure messages are sent in order, since swift actors are reentrant + private let messageQueue: AsyncStream + private let messageQueueContinuation: AsyncStream.Continuation + + let modelResourceName: String + let generationConfig: LiveGenerationConfig? + let urlSession: URLSession + let apiConfig: APIConfig + let firebaseInfo: FirebaseInfo + let tools: [Tool]? + let toolConfig: ToolConfig? + let systemInstruction: ModelContent? + + var webSocket: AsyncWebSocket? + + private let jsonEncoder = JSONEncoder() + private let jsonDecoder = JSONDecoder() + + private var setupTask: Task + private var responsesTask: Task? + private var messageQueueTask: Task? + + init(modelResourceName: String, + generationConfig: LiveGenerationConfig?, + urlSession: URLSession, + apiConfig: APIConfig, + firebaseInfo: FirebaseInfo, + tools: [Tool]?, + toolConfig: ToolConfig?, + systemInstruction: ModelContent?) { + (responses, responseContinuation) = AsyncThrowingStream.makeStream() + (messageQueue, messageQueueContinuation) = AsyncStream.makeStream() + self.modelResourceName = modelResourceName + self.generationConfig = generationConfig + self.urlSession = urlSession + self.apiConfig = apiConfig + self.firebaseInfo = firebaseInfo + self.tools = tools + self.toolConfig = toolConfig + self.systemInstruction = systemInstruction + setupTask = Task {} + } + + deinit { + setupTask.cancel() + responsesTask?.cancel() + messageQueueTask?.cancel() + webSocket?.disconnect() + + webSocket = nil + responsesTask = nil + messageQueueTask = nil + } + + /// Queue a message to be sent to the model. + /// + /// If there's any issues while sending the message, details about the issue will be logged. + /// + /// Since messages are queued syncronously, they are sent in-order. + public func send(_ message: BidiGenerateContentClientMessage) { + messageQueueContinuation.yield(message) + } + + /// Start a new connection to the backend. + /// + /// Seperated into its own function to make it easier to surface a way to call it seperately when resuming the same session. + public func connect() { + setupTask.cancel() + setupTask = Task { [weak self] in + try await withCheckedThrowingContinuation { setupContinuation in + Task { [weak self] in + guard let self else { return } + await self.listenToServer(setupContinuation) + } + } + } + } + + /// Start a fresh websocket to the backend, and listen for responses. + /// + /// Will hold off on sending any messages until the server sends a setupComplete mesage. + /// + /// Will also close out the old websocket and the previous long running tasks. + private func listenToServer(_ setupComplete: CheckedContinuation) async { + // close out the existing connections, if any + webSocket?.disconnect() + responsesTask?.cancel() + messageQueueTask?.cancel() + + do { + webSocket = try await createWebsocket() + } catch { + let error = LiveSessionSetupError(underlyingError: error) + close() + setupComplete.resume(throwing: error) + return + } + + guard let webSocket else { return } + let stream = webSocket.connect() + + var resumed = false + + // remove the uncommon (and unexpected) responses from the stream, to make normal path cleaner + let dataStream = stream.compactMap { (message: URLSessionWebSocketTask.Message) -> Data? in + switch message { + case let .string(string): + AILog.error(code: .liveSessionUnexpectedResponse, "Unexpected string response: \(string)") + case let .data(data): + return data + @unknown default: + AILog.error(code: .liveSessionUnexpectedResponse, "Unknown message received: \(message)") + } + return nil + } + + do { + let setup = BidiGenerateContentSetup( + model: modelResourceName, + generationConfig: generationConfig, + systemInstruction: systemInstruction, + tools: tools, + toolConfig: toolConfig, + outputAudioTranscription: AudioTranscriptionConfig() + ) + let data = try jsonEncoder.encode(BidiGenerateContentClientMessage.setup(setup)) + try await webSocket.send(.data(data)) + } catch { + let error = LiveSessionSetupError(underlyingError: error) + close() + setupComplete.resume(throwing: error) + return + } + + responsesTask = Task { + do { + for try await message in dataStream { + let response: BidiGenerateContentServerMessage + do { + response = try jsonDecoder.decode( + BidiGenerateContentServerMessage.self, + from: message + ) + } catch { + throw LiveSessionUnsupportedMessageError(underlyingError: error) + } + + if case .setupComplete = response.messageType { + if resumed { + AILog.debug( + code: .duplicateLiveSessionSetupComplete, + "Setup complete was received multiple times; this may be a bug in the model." + ) + } else { + // calling resume multiple times is an error in swift, so we catch multiple calls + // to avoid causing any issues due to model quirks + resumed = true + setupComplete.resume() + } + } else if let liveMessage = LiveServerMessage.tryFrom(response) { + responseContinuation.yield(liveMessage) + } else { + // we don't raise an error, since this allows us to add support internally but not publicly + // we still log it in debug though, in case it's not expected + AILog.debug( + code: .liveSessionUnsupportedMessage, + "The server sent a message that we don't currently have a mapping for: \(response)" + ) + } + // TODO: (b/xxx) When we get the goingAway message (and auto session resumption is enabled) then call `connect` again + } + } catch { + if let error = error as? WebSocketClosedError { + // only raise an error if the session didn't close normally (ie; the user calling close) + if error.closeCode != .goingAway { + let error = LiveSessionUnexpectedClosureError(underlyingError: error) + close() + responseContinuation.finish(throwing: error) + } + } else { + // an error occurred outside the websocket, so it's likely not closed + close() + responseContinuation.finish(throwing: error) + } + } + } + + messageQueueTask = Task { + for await message in messageQueue { + // we don't propogate errors, since those are surfaced in the responses stream + guard let _ = try? await setupTask.value else { + break + } + + let data: Data + do { + data = try jsonEncoder.encode(message) + } catch { + AILog.error(code: .liveSessionFailedToEncodeClientMessage, error.localizedDescription) + AILog.debug( + code: .liveSessionFailedToEncodeClientMessagePayload, + String(describing: message) + ) + continue + } + + do { + try await webSocket.send(.data(data)) + } catch { + AILog.error(code: .liveSessionFailedToSendClientMessage, error.localizedDescription) + } + } + } + } + + /// Cancel any running tasks and close the websocket. + /// + /// This method is idempotent; if it's already ran once, it will effectively be a no-op. + private func close() { + setupTask.cancel() + responsesTask?.cancel() + messageQueueTask?.cancel() + webSocket?.disconnect() + + webSocket = nil + responsesTask = nil + messageQueueTask = nil + } + + /// Creates a websocket pointing to the backend. + /// + /// Will apply the required app check and auth headers, as the backend expects them. + private func createWebsocket() async throws -> AsyncWebSocket { + let urlString = switch apiConfig.service { + case .vertexAI: + "wss://firebasevertexai.googleapis.com/ws/google.firebase.vertexai.v1beta.LlmBidiService/BidiGenerateContent/locations/us-central1" + case .googleAI: + "wss://firebasevertexai.googleapis.com/ws/google.firebase.vertexai.v1beta.GenerativeService/BidiGenerateContent" + } + guard let url = URL(string: urlString) else { + throw NSError( + domain: "\(Constants.baseErrorDomain).\(Self.self)", + code: AILog.MessageCode.invalidWebsocketURL.rawValue, + userInfo: [ + NSLocalizedDescriptionKey: "The live API websocket URL is not a valid URL", + ] + ) + } + var urlRequest = URLRequest(url: url) + urlRequest.setValue(firebaseInfo.apiKey, forHTTPHeaderField: "x-goog-api-key") + urlRequest.setValue( + "\(GenerativeAIService.languageTag) \(GenerativeAIService.firebaseVersionTag)", + forHTTPHeaderField: "x-goog-api-client" + ) + urlRequest.setValue("application/json", forHTTPHeaderField: "Content-Type") + + if let appCheck = firebaseInfo.appCheck { + let tokenResult = try await appCheck.fetchAppCheckToken( + limitedUse: firebaseInfo.useLimitedUseAppCheckTokens, + domain: "LiveSessionService" + ) + urlRequest.setValue(tokenResult.token, forHTTPHeaderField: "X-Firebase-AppCheck") + if let error = tokenResult.error { + AILog.error( + code: .appCheckTokenFetchFailed, + "Failed to fetch AppCheck token. Error: \(error)" + ) + } + } + + if let auth = firebaseInfo.auth, let authToken = try await auth.getToken( + forcingRefresh: false + ) { + urlRequest.setValue("Firebase \(authToken)", forHTTPHeaderField: "Authorization") + } + + if firebaseInfo.app.isDataCollectionDefaultEnabled { + urlRequest.setValue(firebaseInfo.firebaseAppID, forHTTPHeaderField: "X-Firebase-AppId") + if let appVersion = Bundle.main.infoDictionary?["CFBundleShortVersionString"] as? String { + urlRequest.setValue(appVersion, forHTTPHeaderField: "X-Firebase-AppVersion") + } + } + + return AsyncWebSocket(urlSession: urlSession, urlRequest: urlRequest) + } +} + +/// The live model sent a message that the SDK failed to parse. +/// +/// This may indicate that the SDK version needs updating, a model is too old for the current SDK +/// version, or that the model is just +/// not supported. +/// +/// Check the `NSUnderlyingErrorKey` entry in ``errorUserInfo`` for the error that caused this. +public struct LiveSessionUnsupportedMessageError: Error, Sendable, CustomNSError { + let underlyingError: Error + + init(underlyingError: Error) { + self.underlyingError = underlyingError + } + + public var errorUserInfo: [String: Any] { + [ + NSLocalizedDescriptionKey: "Failed to parse a live message from the model. Cause: \(underlyingError.localizedDescription)", + NSUnderlyingErrorKey: underlyingError, + ] + } +} + +/// The live session was closed, but not for a reason the SDK expected. +/// +/// Check the `NSUnderlyingErrorKey` entry in ``errorUserInfo`` for the error that caused this. +// TODO: two common causes I can think of are api limits and network issues. I wonder if we can catch these somehow, as they seem common enough to surface as actual errors. +public struct LiveSessionUnexpectedClosureError: Error, Sendable, CustomNSError { + let underlyingError: WebSocketClosedError + + init(underlyingError: WebSocketClosedError) { + self.underlyingError = underlyingError + } + + public var errorUserInfo: [String: Any] { + [ + NSLocalizedDescriptionKey: "The live session was closed for some unexpected reason. Cause: \(underlyingError.localizedDescription)", + NSUnderlyingErrorKey: underlyingError, + ] + } +} + +/// The live session exceeded the maximum session duration, and was closed. +/// +/// To learn more, to see the docs on [Maximum session duration](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/multimodal-live#maximum-session-duration)\. +// TODO: investigate if the server sends a specific message when this happens, or if we'll need to look at .goingAway and keep an internal note of it +public struct LiveSessionExceededTimeLimit: Error, Sendable, CustomNSError { + let underlyingError: WebSocketClosedError + + init(underlyingError: WebSocketClosedError) { + self.underlyingError = underlyingError + } + + public var errorUserInfo: [String: Any] { + [ + NSLocalizedDescriptionKey: "The live session exceeded the maximum session duration, and was permanently closed. Start a new session to continue.", + NSUnderlyingErrorKey: underlyingError, + ] + } +} + +/// The live model refused our request to setup a live session. +/// +/// This can occur due to the model not supporting the requested response modalities, the project +/// not having access to the model, +/// the model being invalid, or some internal error. +/// +/// Check the `NSUnderlyingErrorKey` entry in ``errorUserInfo`` for the error that caused this. +public struct LiveSessionSetupError: Error, Sendable, CustomNSError { + let underlyingError: Error + + init(underlyingError: Error) { + self.underlyingError = underlyingError + } + + public var errorUserInfo: [String: Any] { + [ + NSLocalizedDescriptionKey: "The model did not accept the live session request. Reason: \(underlyingError.localizedDescription)", + NSUnderlyingErrorKey: underlyingError, + ] + } +} diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveGenerationConfig.swift b/FirebaseAI/Sources/Types/Public/Live/LiveGenerationConfig.swift index ae961d14fb0..bb709f28235 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveGenerationConfig.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveGenerationConfig.swift @@ -14,6 +14,7 @@ import Foundation +// TODO: add support for SpeechConfig /// A struct defining model parameters to be used when sending generative AI /// requests to the backend model. @available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveGenerativeModel.swift b/FirebaseAI/Sources/Types/Public/Live/LiveGenerativeModel.swift index 689e690a631..fcf0bf712ee 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveGenerativeModel.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveGenerativeModel.swift @@ -14,55 +14,56 @@ import Foundation +/// A multimodal model (like Gemini) capable of real-time content generation based on +/// various input types, supporting bidirectional streaming. +/// +/// You can create a new session via ``connect()``. @available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) public final class LiveGenerativeModel { let modelResourceName: String let firebaseInfo: FirebaseInfo let apiConfig: APIConfig let generationConfig: LiveGenerationConfig? - let requestOptions: RequestOptions + let tools: [Tool]? + let toolConfig: ToolConfig? + let systemInstruction: ModelContent? let urlSession: URLSession init(modelResourceName: String, firebaseInfo: FirebaseInfo, apiConfig: APIConfig, generationConfig: LiveGenerationConfig? = nil, - requestOptions: RequestOptions, + tools: [Tool]? = nil, + toolConfig: ToolConfig? = nil, + systemInstruction: ModelContent? = nil, urlSession: URLSession = GenAIURLSession.default) { self.modelResourceName = modelResourceName self.firebaseInfo = firebaseInfo self.apiConfig = apiConfig self.generationConfig = generationConfig - // TODO: Add tools - // TODO: Add tool config - // TODO: Add system instruction - self.requestOptions = requestOptions + self.tools = tools + self.toolConfig = toolConfig + self.systemInstruction = systemInstruction self.urlSession = urlSession } - public func connect() -> LiveSession { - let liveSession = LiveSession( + /// Start a ``LiveSession`` with the server for bidirectional streaming. + /// + /// - Returns: A new ``LiveSession`` that you can use to stream messages to and from the server. + public func connect() async throws -> LiveSession { + let service = LiveSessionService( modelResourceName: modelResourceName, generationConfig: generationConfig, - url: webSocketURL(), - urlSession: urlSession + urlSession: urlSession, + apiConfig: apiConfig, + firebaseInfo: firebaseInfo, + tools: tools, + toolConfig: toolConfig, + systemInstruction: systemInstruction, ) - print("Opening Live Session...") - liveSession.openConnection() - return liveSession - } - func webSocketURL() -> URL { - let urlString = switch apiConfig.service { - case .vertexAI: - "wss://firebasevertexai.googleapis.com/ws/google.firebase.vertexai.v1beta.LlmBidiService/BidiGenerateContent/locations/us-central1?key=\(firebaseInfo.apiKey)" - case .googleAI: - "wss://firebasevertexai.googleapis.com/ws/google.firebase.vertexai.v1beta.GenerativeService/BidiGenerateContent?key=\(firebaseInfo.apiKey)" - } - guard let url = URL(string: urlString) else { - // TODO: Add error handling - fatalError() - } - return url + await service.connect() + + return LiveSession(service: service) } } diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveServerContent.swift b/FirebaseAI/Sources/Types/Public/Live/LiveServerContent.swift new file mode 100644 index 00000000000..398a99099c2 --- /dev/null +++ b/FirebaseAI/Sources/Types/Public/Live/LiveServerContent.swift @@ -0,0 +1,75 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/// Incremental server update generated by the model in response to client +/// messages. +/// +/// Content is generated as quickly as possible, and not in realtime. Clients +/// may choose to buffer and play it out in realtime. +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +public struct LiveServerContent: Sendable { + let serverContent: BidiGenerateContentServerContent + + /// The content that the model has generated as part of the current + /// conversation with the user. + /// + /// This can be ``null`` if the message signifies something else (such + /// as the turn ending). + public var modelTurn: ModelContent? { serverContent.modelTurn } + + /// The model has finished sending data in the current turn. + /// + /// Generation will only start in response to additional client messages. + /// + /// Can be set alongside ``content``, indicating that the ``content`` is + /// the last in the turn. + public var turnComplete: Bool? { serverContent.turnComplete } + + /// The model was interrupted by a client message while generating data. + /// + /// If the client is playing out the content in realtime, this is a + /// good signal to stop and empty the current queue. + public var interrupted: Bool? { serverContent.interrupted } + + /// The model has finished _generating_ data for the current turn. + /// + /// For realtime playback, there will be a delay between when the model finishes generating + /// content + /// and the client has finished playing back the generated content. ``generationComplete`` + /// indicates + /// that the model is done generating data, while ``turnComplete`` indicates the model is waiting + /// for + /// additional client messages. Sending a message during this delay may cause an ``interrupted`` + /// message to be sent. + /// + /// Note that if the model was ``interrupted``, this will not be set. The model will go from + /// ``interrupted`` -> ``turnComplete``. + public var generationComplete: Bool? { serverContent.generationComplete } + + /// Metadata specifing the sources used to ground generated content. + public var groundingMetadata: GroundingMetadata? { serverContent.groundingMetadata } + + // TODO: remove + public var transcript: LiveTranscript? { + if let transcript = serverContent.outputTranscription { + LiveTranscript(transcript) + } else { + nil + } + } + + init(_ serverContent: BidiGenerateContentServerContent) { + self.serverContent = serverContent + } +} diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveServerGoAway.swift b/FirebaseAI/Sources/Types/Public/Live/LiveServerGoAway.swift new file mode 100644 index 00000000000..679c38a63b6 --- /dev/null +++ b/FirebaseAI/Sources/Types/Public/Live/LiveServerGoAway.swift @@ -0,0 +1,31 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +/// Server will not be able to service client soon. +/// +/// To learn more about session limits, see the docs on [Maximum session duration](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/multimodal-live#maximum-session-duration)\. +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +public struct LiveServerGoAway: Sendable { + let goAway: GoAway + /// The remaining time before the connection will be terminated as ABORTED. + /// The minimal time returned here is specified differently together with + /// the rate limits for a given model. + public var timeLeft: TimeInterval? { goAway.timeLeft } + + init(_ goAway: GoAway) { + self.goAway = goAway + } +} diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveServerMessage.swift b/FirebaseAI/Sources/Types/Public/Live/LiveServerMessage.swift new file mode 100644 index 00000000000..59ded46bbea --- /dev/null +++ b/FirebaseAI/Sources/Types/Public/Live/LiveServerMessage.swift @@ -0,0 +1,72 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/// Update from the server, generated from the model in response to client messages. +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +public struct LiveServerMessage: Sendable { + let serverMessage: BidiGenerateContentServerMessage + + /// The type of message sent from the server. + public enum MessageType: Sendable { + /// Content generated by the model in response to client messages. + case content(LiveServerContent) + + /// Request for the client to execute the provided functions. + case toolCall(LiveServerToolCall) + + /// Notification for the client that a previously issued ``LiveServerToolCall`` should be cancelled. + case toolCallCancellation(LiveServerToolCallCancellation) + + /// Server will disconnect soon. + case goAway(LiveServerGoAway) + } + + /// The actual message sent from the server. + public var messageType: MessageType + + /// + public var usageMetadata: GenerateContentResponse.UsageMetadata? { serverMessage.usageMetadata } +} + +// MARK: - Internal parsing + +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +internal extension LiveServerMessage { + static func tryFrom(_ serverMessage: BidiGenerateContentServerMessage) -> Self? { + guard let messageType = LiveServerMessage.MessageType.tryFrom(serverMessage.messageType) else { + return nil + } + + return LiveServerMessage(serverMessage: serverMessage, messageType: messageType) + } +} + +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +internal extension LiveServerMessage.MessageType { + static func tryFrom(_ serverMessage: BidiGenerateContentServerMessage.MessageType) -> Self? { + return switch serverMessage { + case .setupComplete: + // this is handled internally, and should not be surfaced to users + nil + case let .serverContent(msg): + .content(LiveServerContent(msg)) + case let .toolCall(msg): + .toolCall(LiveServerToolCall(msg)) + case let .toolCallCancellation(msg): + .toolCallCancellation(LiveServerToolCallCancellation(msg)) + case let .goAway(msg): + .goAway(LiveServerGoAway(msg)) + } + } +} diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveServerToolCall.swift b/FirebaseAI/Sources/Types/Public/Live/LiveServerToolCall.swift new file mode 100644 index 00000000000..bba0947e868 --- /dev/null +++ b/FirebaseAI/Sources/Types/Public/Live/LiveServerToolCall.swift @@ -0,0 +1,32 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/// Request for the client to execute the provided ``functionCalls``. +/// +/// The client should return matching ``FunctionResponsePart``, where the `id` fields correspond to +/// individual +/// ``FunctionCallPart``s. +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +public struct LiveServerToolCall: Sendable { + let serverToolCall: BidiGenerateContentToolCall + + /// A list of ``FunctionCallPart`` to run and return responses for. + public var functionCalls: [FunctionCallPart]? { + serverToolCall.functionCalls?.map { FunctionCallPart($0) } + } + + init(_ serverToolCall: BidiGenerateContentToolCall) { + self.serverToolCall = serverToolCall + } +} diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveServerToolCallCancellation.swift b/FirebaseAI/Sources/Types/Public/Live/LiveServerToolCallCancellation.swift new file mode 100644 index 00000000000..25641c410ba --- /dev/null +++ b/FirebaseAI/Sources/Types/Public/Live/LiveServerToolCallCancellation.swift @@ -0,0 +1,30 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/// Notification for the client to cancel a previous function call from ``LiveServerToolCall``. +/// +/// The client does not need to send ``FunctionResponsePart``s for the cancelled +/// ``FunctionCallPart``s. +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +public struct LiveServerToolCallCancellation: Sendable { + let serverToolCallCancellation: BidiGenerateContentToolCallCancellation + /// A list of `id`s matching the `id` provided in a previous ``LiveServerToolCall``, where only + /// the provided `id`s should + /// be cancelled. + public var ids: [String]? { serverToolCallCancellation.ids } + + init(_ serverToolCallCancellation: BidiGenerateContentToolCallCancellation) { + self.serverToolCallCancellation = serverToolCallCancellation + } +} diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift b/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift index edf248c440e..b9049db4b8c 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift @@ -14,74 +14,116 @@ import Foundation -// TODO: Extract most of this file into a service class similar to `GenerativeAIService`. -@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +/// A live WebSocket session, capable of streaming content to and from the model. +/// +/// Messages are streamed through ``responses``, and can be sent through either the dedicated +/// realtime API function (such as ``sendAudioRealtime(audio:)`` or ``sendTextRealtime(text:)``), or +/// through the incremental API (such as ``sendContent(_:turnComplete:)``). +/// +/// To create an instance of this class, see ``LiveGenerativeModel``. +@available(macOS 12.0, *) public final class LiveSession: Sendable { - let modelResourceName: String - let generationConfig: LiveGenerationConfig? - let webSocket: AsyncWebSocket + private let service: LiveSessionService - public let responses: AsyncThrowingStream - private let responseContinuation: AsyncThrowingStream - .Continuation + /// An asyncronous stream of messages from the server. + /// + /// These messages from the incremental updates from the model, for the current conversation. + public var responses: AsyncThrowingStream { service.responses } - private let jsonEncoder = JSONEncoder() - private let jsonDecoder = JSONDecoder() + init(service: LiveSessionService) { + self.service = service + } - init(modelResourceName: String, - generationConfig: LiveGenerationConfig?, - url: URL, - urlSession: URLSession) { - self.modelResourceName = modelResourceName - self.generationConfig = generationConfig - webSocket = AsyncWebSocket(urlSession: urlSession, urlRequest: URLRequest(url: url)) - (responses, responseContinuation) = AsyncThrowingStream.makeStream() + /// Response to a ``LiveServerToolCall`` received from the server. + /// + /// - Parameters: + /// - responses: Client generated function results, matched to their respective + /// ``FunctionCallPart`` by the `id` field. + public func functionResponses(_ responses: [FunctionResponsePart]) async { + // TODO: what happens if you send an empty list lol + let message = BidiGenerateContentToolResponse( + functionResponses: responses.map { $0.functionResponse } + ) + await service.send(.toolResponse(message)) } - deinit { - webSocket.disconnect() + /// Sends an audio input stream to the model, using the realtime API. + /// + /// To learn more about audio formats, and the required state they should be provided in, see the + /// docs on + /// [Supported audio formats](https://cloud.google.com/vertex-ai/generative-ai/docs/live-api#supported-audio-formats). + /// + /// - Parameters: + /// - audio: Raw 16-bit PCM audio at 16Hz, used to update the model on the client's + /// conversation. + public func sendAudioRealtime(audio: Data) async { + // TODO: (b/443984790) address when we add RealtimeInputConfig support + let message = BidiGenerateContentRealtimeInput( + audio: InlineData(data: audio, mimeType: "audio/pcm") + ) + await service.send(.realtimeInput(message)) } - public func sendMessage(_ message: String) async throws { - let content = ModelContent(role: "user", parts: [message]) - let clientContent = BidiGenerateContentClientContent(turns: [content], turnComplete: true) - let clientMessage = BidiGenerateContentClientMessage.clientContent(clientContent) - let clientMessageData = try jsonEncoder.encode(clientMessage) - try await webSocket.send(.data(clientMessageData)) + /// Sends a video input stream to the model, using the realtime API. + /// + /// - Parameters: + /// - video: Encoded video data, used to update the model on the client's conversation. + /// - format: The format that the video was encoded in (eg; `mp4`, `webm`, `wmv`, etc.,). + public func sendVideoRealtime(video: Data, format: String = "mp4") async { + let message = BidiGenerateContentRealtimeInput( + video: InlineData(data: video, mimeType: "video/\(format)") + ) + await service.send(.realtimeInput(message)) } - func openConnection() { - Task { - do { - let stream = webSocket.connect() - try await sendSetupMessage() - for try await message in stream { - switch message { - case let .string(string): - print("Unexpected string response: \(string)") - case let .data(data): - let response = try jsonDecoder.decode( - BidiGenerateContentServerMessage.self, - from: data - ) - responseContinuation.yield(response) - @unknown default: - print("Unknown message received") - } - } - } catch { - responseContinuation.finish(throwing: error) - } - responseContinuation.finish() - } + /// Sends a text input stream to the model, using the realtime API. + /// + /// - Parameters: + /// - text: Text content to append to the current client's conversation. + public func sendTextRealtime(text: String) async { + let message = BidiGenerateContentRealtimeInput(text: text) + await service.send(.realtimeInput(message)) } - private func sendSetupMessage() async throws { - let setup = BidiGenerateContentSetup( - model: modelResourceName, generationConfig: generationConfig - ) - let message = BidiGenerateContentClientMessage.setup(setup) - let messageData = try jsonEncoder.encode(message) - try await webSocket.send(.data(messageData)) + /// Incremental update of the current conversation. + /// + /// The content is unconditionally appended to the conversation history and used as part of the + /// prompt to the model to + /// generate content. + /// + /// Sending this message will also cause an interruption, if the server is actively generating + /// content. + /// + /// - Parameters: + /// - content: Content to append to the current conversation with the model. + /// - turnComplete: Whether the server should start generating content with the currently + /// accumulated prompt, or await + /// additional messages before starting generation. By default, the server will await additional + /// messages. + public func sendContent(_ content: [ModelContent], turnComplete: Bool? = nil) async { + let message = BidiGenerateContentClientContent(turns: content, turnComplete: turnComplete) + await service.send(.clientContent(message)) + } + + /// Incremental update of the current conversation. + /// + /// The content is unconditionally appended to the conversation history and used as part of the + /// prompt to the model to + /// generate content. + /// + /// Sending this message will also cause an interruption, if the server is actively generating + /// content. + /// + /// - Parameters: + /// - content: Content to append to the current conversation with the model (see + /// ``PartsRepresentable`` for + /// conforming types). + /// - turnComplete: Whether the server should start generating content with the currently + /// accumulated prompt, or await + /// additional messages before starting generation. By default, the server will await additional + /// messages. + public func sendContent(_ parts: any PartsRepresentable..., + turnComplete: Bool? = nil) async { + await sendContent([ModelContent(parts: parts)], turnComplete: turnComplete) } } diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveTranscript.swift b/FirebaseAI/Sources/Types/Public/Live/LiveTranscript.swift new file mode 100644 index 00000000000..8545b669976 --- /dev/null +++ b/FirebaseAI/Sources/Types/Public/Live/LiveTranscript.swift @@ -0,0 +1,25 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// TODO: remove +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +public struct LiveTranscript: Sendable { + let transcript: BidiGenerateContentTranscription + public var text: String? { transcript.text } + public var finished: Bool? { transcript.finished } + + init(_ transcript: BidiGenerateContentTranscription) { + self.transcript = transcript + } +} diff --git a/FirebaseAI/Sources/Types/Public/Part.swift b/FirebaseAI/Sources/Types/Public/Part.swift index e0015901d61..d97cac6d3b5 100644 --- a/FirebaseAI/Sources/Types/Public/Part.swift +++ b/FirebaseAI/Sources/Types/Public/Part.swift @@ -159,7 +159,7 @@ public struct FunctionCallPart: Part { self.init(FunctionCall(name: name, args: args), isThought: nil, thoughtSignature: nil) } - init(_ functionCall: FunctionCall, isThought: Bool?, thoughtSignature: String?) { + init(_ functionCall: FunctionCall, isThought: Bool? = nil, thoughtSignature: String? = nil) { self.functionCall = functionCall _isThought = isThought self.thoughtSignature = thoughtSignature @@ -177,6 +177,9 @@ public struct FunctionResponsePart: Part { let _isThought: Bool? let thoughtSignature: String? + // TODO: add docs + public var id: String? { functionResponse.id } + /// The name of the function that was called. public var name: String { functionResponse.name } @@ -196,6 +199,15 @@ public struct FunctionResponsePart: Part { ) } + // TODO: add docs for id param + public init(name: String, response: JSONObject, id: String? = nil) { + self.init( + FunctionResponse(name: name, response: response, id: id), + isThought: nil, + thoughtSignature: nil + ) + } + init(_ functionResponse: FunctionResponse, isThought: Bool?, thoughtSignature: String?) { self.functionResponse = functionResponse _isThought = isThought diff --git a/FirebaseAI/Sources/Types/Public/ResponseModality.swift b/FirebaseAI/Sources/Types/Public/ResponseModality.swift index 442fed5f434..743093d7c90 100644 --- a/FirebaseAI/Sources/Types/Public/ResponseModality.swift +++ b/FirebaseAI/Sources/Types/Public/ResponseModality.swift @@ -28,6 +28,7 @@ public struct ResponseModality: EncodableProtoEnum, Sendable { enum Kind: String { case text = "TEXT" case image = "IMAGE" + case audio = "AUDIO" } /// Specifies that the model should generate textual content. @@ -48,5 +49,7 @@ public struct ResponseModality: EncodableProtoEnum, Sendable { /// > backwards-incompatible ways. public static let image = ResponseModality(kind: .image) + public static let audio = ResponseModality(kind: .audio) + let rawValue: String } From b5b0107bf217eaa332de1e204fc45f2dc0167f65 Mon Sep 17 00:00:00 2001 From: Daymon Date: Tue, 9 Sep 2025 16:22:09 -0500 Subject: [PATCH 15/98] Add support for SpeechConfig --- FirebaseAI/Sources/AILog.swift | 2 - .../Sources/Types/Internal/AppCheck.swift | 2 +- .../Internal/Live/LiveSessionService.swift | 7 +- .../Types/Internal/Live/SpeechConfig.swift | 29 ++++++++ .../Types/Internal/Live/VoiceConfig.swift | 66 +++++++++++++++++++ .../Public/Live/LiveGenerationConfig.swift | 11 +++- .../Types/Public/Live/LiveServerMessage.swift | 9 +-- .../Types/Public/Live/LiveSpeechConfig.swift | 46 +++++++++++++ 8 files changed, 160 insertions(+), 12 deletions(-) create mode 100644 FirebaseAI/Sources/Types/Internal/Live/SpeechConfig.swift create mode 100644 FirebaseAI/Sources/Types/Internal/Live/VoiceConfig.swift create mode 100644 FirebaseAI/Sources/Types/Public/Live/LiveSpeechConfig.swift diff --git a/FirebaseAI/Sources/AILog.swift b/FirebaseAI/Sources/AILog.swift index ffbfef28a83..e850abd397b 100644 --- a/FirebaseAI/Sources/AILog.swift +++ b/FirebaseAI/Sources/AILog.swift @@ -72,7 +72,6 @@ enum AILog { case liveSessionFailedToSendClientMessage = 3021 case liveSessionUnexpectedResponse = 3022 - // SDK State Errors case generateContentResponseNoCandidates = 4000 case generateContentResponseNoText = 4001 @@ -81,7 +80,6 @@ enum AILog { case invalidWebsocketURL = 4004 case duplicateLiveSessionSetupComplete = 4005 - // SDK Debugging case loadRequestStreamResponseLine = 5000 } diff --git a/FirebaseAI/Sources/Types/Internal/AppCheck.swift b/FirebaseAI/Sources/Types/Internal/AppCheck.swift index 9510e5640e6..4da2defd1ba 100644 --- a/FirebaseAI/Sources/Types/Internal/AppCheck.swift +++ b/FirebaseAI/Sources/Types/Internal/AppCheck.swift @@ -15,7 +15,7 @@ import FirebaseAppCheckInterop // TODO: document -internal extension AppCheckInterop { +extension AppCheckInterop { // TODO: Document func fetchAppCheckToken(limitedUse: Bool, domain: String) async throws -> FIRAppCheckTokenResultInterop { diff --git a/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift b/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift index 6f1d98e7217..d11803d7dac 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift @@ -100,7 +100,8 @@ actor LiveSessionService { /// Start a new connection to the backend. /// - /// Seperated into its own function to make it easier to surface a way to call it seperately when resuming the same session. + /// Seperated into its own function to make it easier to surface a way to call it seperately when + /// resuming the same session. public func connect() { setupTask.cancel() setupTask = Task { [weak self] in @@ -197,8 +198,8 @@ actor LiveSessionService { } else if let liveMessage = LiveServerMessage.tryFrom(response) { responseContinuation.yield(liveMessage) } else { - // we don't raise an error, since this allows us to add support internally but not publicly - // we still log it in debug though, in case it's not expected + // we don't raise an error, since this allows us to add support internally but not + // publicly we still log it in debug though, in case it's not expected AILog.debug( code: .liveSessionUnsupportedMessage, "The server sent a message that we don't currently have a mapping for: \(response)" diff --git a/FirebaseAI/Sources/Types/Internal/Live/SpeechConfig.swift b/FirebaseAI/Sources/Types/Internal/Live/SpeechConfig.swift new file mode 100644 index 00000000000..c41ea5dfc00 --- /dev/null +++ b/FirebaseAI/Sources/Types/Internal/Live/SpeechConfig.swift @@ -0,0 +1,29 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +/// Speech generation config. +struct SpeechConfig: Encodable, Sendable { + /// The configuration for the speaker to use. + let voiceConfig: VoiceConfig + + /// Language code (ISO 639. e.g. en-US) for the speech synthesization. + let languageCode: String? + + init(voiceConfig: VoiceConfig, languageCode: String?) { + self.voiceConfig = voiceConfig + self.languageCode = languageCode + } +} diff --git a/FirebaseAI/Sources/Types/Internal/Live/VoiceConfig.swift b/FirebaseAI/Sources/Types/Internal/Live/VoiceConfig.swift new file mode 100644 index 00000000000..98d17a2a906 --- /dev/null +++ b/FirebaseAI/Sources/Types/Internal/Live/VoiceConfig.swift @@ -0,0 +1,66 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +/// Configuration for the speaker to use. +enum VoiceConfig { + /// Configuration for the prebuilt voice to use. + case prebuiltVoiceConfig(PrebuiltVoiceConfig) + + /// Configuration for the custom voice to use. + case customVoiceConfig(CustomVoiceConfig) +} + +/// The configuration for the prebuilt speaker to use. +/// +/// Not just a string on the parent proto, because there'll likely be a lot +/// more options here. +struct PrebuiltVoiceConfig: Encodable, Sendable { + /// The name of the preset voice to use. + let voiceName: String + + init(voiceName: String) { + self.voiceName = voiceName + } +} + +/// The configuration for the custom voice to use. +struct CustomVoiceConfig: Encodable, Sendable { + /// The sample of the custom voice, in pcm16 s16e format. + let customVoiceSample: Data + + init(customVoiceSample: Data) { + self.customVoiceSample = customVoiceSample + } +} + +// MARK: - Encodable conformance + +extension VoiceConfig: Encodable { + enum CodingKeys: CodingKey { + case prebuiltVoiceConfig + case customVoiceConfig + } + + func encode(to encoder: any Encoder) throws { + var container = encoder.container(keyedBy: CodingKeys.self) + switch self { + case let .prebuiltVoiceConfig(setup): + try container.encode(setup, forKey: .prebuiltVoiceConfig) + case let .customVoiceConfig(clientContent): + try container.encode(clientContent, forKey: .customVoiceConfig) + } + } +} diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveGenerationConfig.swift b/FirebaseAI/Sources/Types/Public/Live/LiveGenerationConfig.swift index bb709f28235..b4395d8e148 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveGenerationConfig.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveGenerationConfig.swift @@ -14,7 +14,6 @@ import Foundation -// TODO: add support for SpeechConfig /// A struct defining model parameters to be used when sending generative AI /// requests to the backend model. @available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) @@ -43,6 +42,9 @@ public struct LiveGenerationConfig: Sendable { /// Supported modalities of the response. let responseModalities: [ResponseModality]? + /// Controls the voice of the model during conversation. + let speechConfig: SpeechConfig? + /// Creates a new `GenerationConfig` value. /// /// See the @@ -122,10 +124,13 @@ public struct LiveGenerationConfig: Sendable { /// > Warning: Specifying response modalities is a **Public Preview** feature, which means /// > that it is not subject to any SLA or deprecation policy and could change in /// > backwards-incompatible ways. + /// - speechConfig: Controls the voice of the model, when streaming `audio` via + /// ``ResponseModality``. public init(temperature: Float? = nil, topP: Float? = nil, topK: Int? = nil, candidateCount: Int? = nil, maxOutputTokens: Int? = nil, presencePenalty: Float? = nil, frequencyPenalty: Float? = nil, - responseModalities: [ResponseModality]? = nil) { + responseModalities: [ResponseModality]? = nil, + speechConfig: LiveSpeechConfig? = nil) { // Explicit init because otherwise if we re-arrange the above variables it changes the API // surface. self.temperature = temperature @@ -136,6 +141,7 @@ public struct LiveGenerationConfig: Sendable { self.presencePenalty = presencePenalty self.frequencyPenalty = frequencyPenalty self.responseModalities = responseModalities + self.speechConfig = speechConfig?.speechConfig } } @@ -152,5 +158,6 @@ extension LiveGenerationConfig: Encodable { case presencePenalty case frequencyPenalty case responseModalities + case speechConfig } } diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveServerMessage.swift b/FirebaseAI/Sources/Types/Public/Live/LiveServerMessage.swift index 59ded46bbea..75b9d169c12 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveServerMessage.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveServerMessage.swift @@ -25,7 +25,8 @@ public struct LiveServerMessage: Sendable { /// Request for the client to execute the provided functions. case toolCall(LiveServerToolCall) - /// Notification for the client that a previously issued ``LiveServerToolCall`` should be cancelled. + /// Notification for the client that a previously issued ``LiveServerToolCall`` should be + /// cancelled. case toolCallCancellation(LiveServerToolCallCancellation) /// Server will disconnect soon. @@ -35,14 +36,14 @@ public struct LiveServerMessage: Sendable { /// The actual message sent from the server. public var messageType: MessageType - /// + // TODO: document public var usageMetadata: GenerateContentResponse.UsageMetadata? { serverMessage.usageMetadata } } // MARK: - Internal parsing @available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) -internal extension LiveServerMessage { +extension LiveServerMessage { static func tryFrom(_ serverMessage: BidiGenerateContentServerMessage) -> Self? { guard let messageType = LiveServerMessage.MessageType.tryFrom(serverMessage.messageType) else { return nil @@ -53,7 +54,7 @@ internal extension LiveServerMessage { } @available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) -internal extension LiveServerMessage.MessageType { +extension LiveServerMessage.MessageType { static func tryFrom(_ serverMessage: BidiGenerateContentServerMessage.MessageType) -> Self? { return switch serverMessage { case .setupComplete: diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveSpeechConfig.swift b/FirebaseAI/Sources/Types/Public/Live/LiveSpeechConfig.swift new file mode 100644 index 00000000000..7c637bfe85e --- /dev/null +++ b/FirebaseAI/Sources/Types/Public/Live/LiveSpeechConfig.swift @@ -0,0 +1,46 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +/// Configuration for controlling the voice of the model during conversation. +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +public struct LiveSpeechConfig: Sendable { + let speechConfig: SpeechConfig + + init(_ speechConfig: SpeechConfig) { + self.speechConfig = speechConfig + } + + /// Creates a new `LiveSpeechConfig` value. + /// + /// - Parameters: + /// - voiceName: The name of the prebuilt voice to be used for the model's speech response. + /// + /// To learn more about the available voices, see the docs on + /// [Voice options](https://ai.google.dev/gemini-api/docs/speech-generation#voices)\. + /// - languageCode: ISO-639 language code to use when parsing text sent from the client, instead + /// of audio. By default, the model will attempt to detect the input language automatically. + /// + /// To learn which codes are supported, see the docs on + /// [Supported languages](https://ai.google.dev/gemini-api/docs/speech-generation#languages)\. + public init(voiceName: String, languageCode: String? = nil) { + self.init( + SpeechConfig( + voiceConfig: .prebuiltVoiceConfig(.init(voiceName: voiceName)), + languageCode: languageCode + ) + ) + } +} From 9b83676301334d6935ab73f0fc8a9b1926a5e8ba Mon Sep 17 00:00:00 2001 From: Daymon Date: Tue, 9 Sep 2025 16:26:38 -0500 Subject: [PATCH 16/98] Minor doc fixes --- .../Sources/Types/Public/Live/LiveGenerationConfig.swift | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveGenerationConfig.swift b/FirebaseAI/Sources/Types/Public/Live/LiveGenerationConfig.swift index b4395d8e148..26e9b2bb951 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveGenerationConfig.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveGenerationConfig.swift @@ -14,8 +14,7 @@ import Foundation -/// A struct defining model parameters to be used when sending generative AI -/// requests to the backend model. +/// Configuration options for live content generation. @available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) public struct LiveGenerationConfig: Sendable { /// Controls the degree of randomness in token selection. @@ -45,7 +44,7 @@ public struct LiveGenerationConfig: Sendable { /// Controls the voice of the model during conversation. let speechConfig: SpeechConfig? - /// Creates a new `GenerationConfig` value. + /// Creates a new ``LiveGenerationConfig`` value. /// /// See the /// [Configure model parameters](https://firebase.google.com/docs/vertex-ai/model-parameters) From d5e375ff99e0059262b20f417fd7ee47b4b31a9a Mon Sep 17 00:00:00 2001 From: Daymon Date: Tue, 9 Sep 2025 16:38:55 -0500 Subject: [PATCH 17/98] Add missing ID field --- .../Sources/Types/Internal/InternalPart.swift | 5 ++++- FirebaseAI/Sources/Types/Public/Part.swift | 17 ++++++++++++++++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/FirebaseAI/Sources/Types/Internal/InternalPart.swift b/FirebaseAI/Sources/Types/Internal/InternalPart.swift index 521a75a694d..44abee6baeb 100644 --- a/FirebaseAI/Sources/Types/Internal/InternalPart.swift +++ b/FirebaseAI/Sources/Types/Internal/InternalPart.swift @@ -45,10 +45,12 @@ struct FileData: Codable, Equatable, Sendable { struct FunctionCall: Equatable, Sendable { let name: String let args: JSONObject + let id: String? - init(name: String, args: JSONObject) { + init(name: String, args: JSONObject, id: String?) { self.name = name self.args = args + self.id = id } } @@ -137,6 +139,7 @@ extension FunctionCall: Codable { } else { args = JSONObject() } + id = try container.decode(String.self, forKey: .id) } } diff --git a/FirebaseAI/Sources/Types/Public/Part.swift b/FirebaseAI/Sources/Types/Public/Part.swift index d97cac6d3b5..1b26b50049a 100644 --- a/FirebaseAI/Sources/Types/Public/Part.swift +++ b/FirebaseAI/Sources/Types/Public/Part.swift @@ -147,6 +147,7 @@ public struct FunctionCallPart: Part { public var isThought: Bool { _isThought ?? false } + public var id: String? /// Constructs a new function call part. /// /// > Note: A `FunctionCallPart` is typically received from the model, rather than created @@ -156,7 +157,21 @@ public struct FunctionCallPart: Part { /// - name: The name of the function to call. /// - args: The function parameters and values. public init(name: String, args: JSONObject) { - self.init(FunctionCall(name: name, args: args), isThought: nil, thoughtSignature: nil) + self.init(FunctionCall(name: name, args: args, id: nil), isThought: nil, thoughtSignature: nil) + } + + /// Constructs a new function call part. + /// + /// > Note: A `FunctionCallPart` is typically received from the model, rather than created + /// manually. + /// + /// - Parameters: + /// - name: The name of the function to call. + /// - args: The function parameters and values. + /// - id: Unique id of the function call. If present, the returned ``FunctionResponsePart`` + /// should have a matching `id` field. + public init(name: String, args: JSONObject, id: String? = nil) { + self.init(FunctionCall(name: name, args: args, id: id), isThought: nil, thoughtSignature: nil) } init(_ functionCall: FunctionCall, isThought: Bool? = nil, thoughtSignature: String? = nil) { From 543869e590a2edd34fd1bb108c871e0a8b845dd0 Mon Sep 17 00:00:00 2001 From: Daymon Date: Tue, 9 Sep 2025 16:39:03 -0500 Subject: [PATCH 18/98] Remove old todo comment --- .../Types/Internal/Live/BidiGenerateContentServerMessage.swift | 2 -- 1 file changed, 2 deletions(-) diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift index 1b46582a079..0147ecf34dd 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift @@ -17,8 +17,6 @@ import Foundation /// Response message for BidiGenerateContent RPC call. @available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) struct BidiGenerateContentServerMessage: Sendable { - // TODO: Make this type `internal` - /// The type of the message. enum MessageType: Sendable { /// Sent in response to a `BidiGenerateContentSetup` message from the client. From d342829edd6d9408be87fe77b98a482ee5db7ce2 Mon Sep 17 00:00:00 2001 From: Daymon Date: Tue, 9 Sep 2025 16:39:11 -0500 Subject: [PATCH 19/98] Add missing docs --- .../Types/Public/Live/LiveServerMessage.swift | 2 +- FirebaseAI/Sources/Types/Public/Part.swift | 12 ++++++++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveServerMessage.swift b/FirebaseAI/Sources/Types/Public/Live/LiveServerMessage.swift index 75b9d169c12..50cef0c1e6d 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveServerMessage.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveServerMessage.swift @@ -36,7 +36,7 @@ public struct LiveServerMessage: Sendable { /// The actual message sent from the server. public var messageType: MessageType - // TODO: document + /// Metadata on the usage of the cached content. public var usageMetadata: GenerateContentResponse.UsageMetadata? { serverMessage.usageMetadata } } diff --git a/FirebaseAI/Sources/Types/Public/Part.swift b/FirebaseAI/Sources/Types/Public/Part.swift index 1b26b50049a..37707d9de48 100644 --- a/FirebaseAI/Sources/Types/Public/Part.swift +++ b/FirebaseAI/Sources/Types/Public/Part.swift @@ -147,7 +147,10 @@ public struct FunctionCallPart: Part { public var isThought: Bool { _isThought ?? false } + /// Unique id of the function call. If present, the returned ``FunctionResponsePart`` + /// should have a matching `id` field. public var id: String? + /// Constructs a new function call part. /// /// > Note: A `FunctionCallPart` is typically received from the model, rather than created @@ -192,7 +195,7 @@ public struct FunctionResponsePart: Part { let _isThought: Bool? let thoughtSignature: String? - // TODO: add docs + /// Matching `id` for a ``FunctionCallPart``, if one was provided. public var id: String? { functionResponse.id } /// The name of the function that was called. @@ -214,7 +217,12 @@ public struct FunctionResponsePart: Part { ) } - // TODO: add docs for id param + /// Constructs a new `FunctionResponse`. + /// + /// - Parameters: + /// - name: The name of the function that was called. + /// - response: The function's response. + /// - id: Matching `id` for a ``FunctionCallPart``, if one was provided. public init(name: String, response: JSONObject, id: String? = nil) { self.init( FunctionResponse(name: name, response: response, id: id), From 465006e9769d0e03fc043e7bcb09a428e11adb49 Mon Sep 17 00:00:00 2001 From: Daymon Date: Tue, 9 Sep 2025 16:44:30 -0500 Subject: [PATCH 20/98] Add docs for app check extension --- FirebaseAI/Sources/Types/Internal/AppCheck.swift | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/FirebaseAI/Sources/Types/Internal/AppCheck.swift b/FirebaseAI/Sources/Types/Internal/AppCheck.swift index 4da2defd1ba..3b6d784f636 100644 --- a/FirebaseAI/Sources/Types/Internal/AppCheck.swift +++ b/FirebaseAI/Sources/Types/Internal/AppCheck.swift @@ -14,9 +14,18 @@ import FirebaseAppCheckInterop -// TODO: document +/// Internal helper extension for fetching app check tokens. +/// +/// Provides a common means for fetching limited use tokens, and falling back to standard tokens +/// when it's disabled (or in debug mode). This also centrializes the error, since this method is +/// used in multiple places. extension AppCheckInterop { - // TODO: Document + /// Fetch the appcheck token. + /// + /// - Parameters: + /// - limitedUse: Should the token be a limited-use token, or a standard token. + /// - domain: A string dictating where this method is being called from. Used in any thrown + /// errors, to avoid hard-to-parse traces. func fetchAppCheckToken(limitedUse: Bool, domain: String) async throws -> FIRAppCheckTokenResultInterop { if limitedUse { From e51cc639aee152fd6b49bd4106b83c604a7fb5a6 Mon Sep 17 00:00:00 2001 From: Daymon Date: Tue, 9 Sep 2025 16:50:16 -0500 Subject: [PATCH 21/98] Add bug for session resumption --- FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift b/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift index d11803d7dac..e7b57c78a2a 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift @@ -205,7 +205,7 @@ actor LiveSessionService { "The server sent a message that we don't currently have a mapping for: \(response)" ) } - // TODO: (b/xxx) When we get the goingAway message (and auto session resumption is enabled) then call `connect` again + // TODO: (b/444045023) When we get the goingAway message (and auto session resumption is enabled) then call `connect` again } } catch { if let error = error as? WebSocketClosedError { From 3e58503ee65d6c9bddc0d95a88d2b5bf71e2ec66 Mon Sep 17 00:00:00 2001 From: Daymon Date: Wed, 10 Sep 2025 12:13:16 -0500 Subject: [PATCH 22/98] Use boolean to avoid warning --- FirebaseAI/Sources/Types/Internal/Live/AsyncWebSocket.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FirebaseAI/Sources/Types/Internal/Live/AsyncWebSocket.swift b/FirebaseAI/Sources/Types/Internal/Live/AsyncWebSocket.swift index f64f467347a..77a3f83f345 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/AsyncWebSocket.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/AsyncWebSocket.swift @@ -46,7 +46,7 @@ final class AsyncWebSocket: NSObject, @unchecked Sendable, URLSessionWebSocketDe } func disconnect() { - if let closeError { return } + if closeError != nil { return } close(code: .goingAway, reason: nil) } From 55b8425ea8ce757fe05ea540acdb4c762b9a69cb Mon Sep 17 00:00:00 2001 From: Daymon Date: Wed, 10 Sep 2025 12:13:53 -0500 Subject: [PATCH 23/98] Log going away instead of error --- FirebaseAI/Sources/AILog.swift | 1 + .../Internal/Live/LiveSessionService.swift | 30 ++++++------------- 2 files changed, 10 insertions(+), 21 deletions(-) diff --git a/FirebaseAI/Sources/AILog.swift b/FirebaseAI/Sources/AILog.swift index e850abd397b..8f003b610c3 100644 --- a/FirebaseAI/Sources/AILog.swift +++ b/FirebaseAI/Sources/AILog.swift @@ -71,6 +71,7 @@ enum AILog { case liveSessionFailedToEncodeClientMessagePayload = 3020 case liveSessionFailedToSendClientMessage = 3021 case liveSessionUnexpectedResponse = 3022 + case liveSessionGoingAwaySoon = 3023 // SDK State Errors case generateContentResponseNoCandidates = 4000 diff --git a/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift b/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift index e7b57c78a2a..439a065dd4b 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift @@ -196,16 +196,23 @@ actor LiveSessionService { setupComplete.resume() } } else if let liveMessage = LiveServerMessage.tryFrom(response) { + if case let .goAway(message) = liveMessage.messageType { + // TODO: (b/444045023) When auto session resumption is enabled, call `connect` again + AILog.debug( + code: .liveSessionGoingAwaySoon, + "Session expires in: \(message.goAway.timeLeft ?? 0)" + ) + } + responseContinuation.yield(liveMessage) } else { // we don't raise an error, since this allows us to add support internally but not - // publicly we still log it in debug though, in case it's not expected + // publicly. We still log it in debug though, in case it's not expected. AILog.debug( code: .liveSessionUnsupportedMessage, "The server sent a message that we don't currently have a mapping for: \(response)" ) } - // TODO: (b/444045023) When we get the goingAway message (and auto session resumption is enabled) then call `connect` again } } catch { if let error = error as? WebSocketClosedError { @@ -364,25 +371,6 @@ public struct LiveSessionUnexpectedClosureError: Error, Sendable, CustomNSError } } -/// The live session exceeded the maximum session duration, and was closed. -/// -/// To learn more, to see the docs on [Maximum session duration](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/multimodal-live#maximum-session-duration)\. -// TODO: investigate if the server sends a specific message when this happens, or if we'll need to look at .goingAway and keep an internal note of it -public struct LiveSessionExceededTimeLimit: Error, Sendable, CustomNSError { - let underlyingError: WebSocketClosedError - - init(underlyingError: WebSocketClosedError) { - self.underlyingError = underlyingError - } - - public var errorUserInfo: [String: Any] { - [ - NSLocalizedDescriptionKey: "The live session exceeded the maximum session duration, and was permanently closed. Start a new session to continue.", - NSUnderlyingErrorKey: underlyingError, - ] - } -} - /// The live model refused our request to setup a live session. /// /// This can occur due to the model not supporting the requested response modalities, the project From 0c23c7d2b430cefff539ebe2189e793e393659e5 Mon Sep 17 00:00:00 2001 From: Daymon Date: Wed, 10 Sep 2025 12:20:27 -0500 Subject: [PATCH 24/98] Add additional docs for tasks --- .../Sources/Types/Internal/Live/LiveSessionService.swift | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift b/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift index 439a065dd4b..8e325d35bc3 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift @@ -53,8 +53,17 @@ actor LiveSessionService { private let jsonEncoder = JSONEncoder() private let jsonDecoder = JSONDecoder() + /// Task that doesn't complete until the server sends a setupComplete message. + /// + /// Used to hold off on sending messages until the server is ready. private var setupTask: Task + + /// Long running task that that wraps around the websocket, propogating messages through the + /// public stream. private var responsesTask: Task? + + /// Long running task that consumes user messages from the ``messageQueue`` and sends them through + /// the websocket. private var messageQueueTask: Task? init(modelResourceName: String, From 71acb558c6281125a742c7487fd2f2299fc1134b Mon Sep 17 00:00:00 2001 From: Daymon Date: Wed, 10 Sep 2025 12:22:04 -0500 Subject: [PATCH 25/98] Revert andrew's prototyping changes --- .../Tests/TestApp/Sources/ContentView.swift | 31 +++---------------- 1 file changed, 4 insertions(+), 27 deletions(-) diff --git a/FirebaseAI/Tests/TestApp/Sources/ContentView.swift b/FirebaseAI/Tests/TestApp/Sources/ContentView.swift index 37ef5fd527a..52af5939455 100644 --- a/FirebaseAI/Tests/TestApp/Sources/ContentView.swift +++ b/FirebaseAI/Tests/TestApp/Sources/ContentView.swift @@ -12,40 +12,17 @@ // See the License for the specific language governing permissions and // limitations under the License. -import FirebaseAI import SwiftUI struct ContentView: View { - // TODO: Revert changes in this file. For prototyping purposes only. - let liveModel: LiveGenerativeModel = { - // let firebaseAI = FirebaseAI.firebaseAI(backend: .vertexAI()) - let firebaseAI = FirebaseAI.firebaseAI() - return firebaseAI.liveModel( - modelName: "gemini-2.0-flash-live-001", - generationConfig: LiveGenerationConfig(responseModalities: [.text]) - ) - }() - - @State private var responses: [String] = [] - var body: some View { VStack { - List(responses, id: \.self) { - Text($0) - } + Image(systemName: "globe") + .imageScale(.large) + .foregroundStyle(.tint) + Text("Hello, world!") } .padding() - .task { - do { - let liveSession = liveModel.connect() - try await liveSession.sendMessage("Why is the sky blue?") - for try await response in liveSession.responses { - responses.append(String(describing: response)) - } - } catch { - print(error) - } - } } } From 2ccca179fc5622fa05c4accffeb3a502143b3247 Mon Sep 17 00:00:00 2001 From: Daymon Date: Wed, 10 Sep 2025 12:27:15 -0500 Subject: [PATCH 26/98] Add support for request options --- FirebaseAI/Sources/FirebaseAI.swift | 6 ++++-- .../Sources/Types/Internal/Live/LiveSessionService.swift | 6 +++++- .../Sources/Types/Public/Live/LiveGenerativeModel.swift | 6 +++++- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/FirebaseAI/Sources/FirebaseAI.swift b/FirebaseAI/Sources/FirebaseAI.swift index fcea43dbab4..0cf637bbcee 100644 --- a/FirebaseAI/Sources/FirebaseAI.swift +++ b/FirebaseAI/Sources/FirebaseAI.swift @@ -151,7 +151,7 @@ public final class FirebaseAI: Sendable { /// /// - Parameters: /// - modelName: The name of the Livemodel to use, for example - /// `"gemini-live-2.5-flash-preview"`; + /// `"gemini-live-2.5-flash-preview"`; /// see [model versions](https://firebase.google.com/docs/ai-logic/live-api?api=dev#models-that-support-capability) /// for a list of supported Live models. /// - generationConfig: The content generation parameters your model should use. @@ -159,6 +159,7 @@ public final class FirebaseAI: Sendable { /// - toolConfig: Tool configuration for any ``Tool`` specified in the request. /// - systemInstruction: Instructions that direct the model to behave a certain way; currently /// only text content is supported. + /// - requestOptions: Configuration parameters for sending requests to the backend. public func liveModel(modelName: String, generationConfig: LiveGenerationConfig? = nil, tools: [Tool]? = nil, @@ -172,7 +173,8 @@ public final class FirebaseAI: Sendable { generationConfig: generationConfig, tools: tools, toolConfig: toolConfig, - systemInstruction: systemInstruction + systemInstruction: systemInstruction, + requestOptions: requestOptions ) } diff --git a/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift b/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift index 8e325d35bc3..99c96a48643 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift @@ -44,6 +44,7 @@ actor LiveSessionService { let urlSession: URLSession let apiConfig: APIConfig let firebaseInfo: FirebaseInfo + let requestOptions: RequestOptions let tools: [Tool]? let toolConfig: ToolConfig? let systemInstruction: ModelContent? @@ -73,7 +74,8 @@ actor LiveSessionService { firebaseInfo: FirebaseInfo, tools: [Tool]?, toolConfig: ToolConfig?, - systemInstruction: ModelContent?) { + systemInstruction: ModelContent?, + requestOptions: RequestOptions) { (responses, responseContinuation) = AsyncThrowingStream.makeStream() (messageQueue, messageQueueContinuation) = AsyncStream.makeStream() self.modelResourceName = modelResourceName @@ -84,6 +86,7 @@ actor LiveSessionService { self.tools = tools self.toolConfig = toolConfig self.systemInstruction = systemInstruction + self.requestOptions = requestOptions setupTask = Task {} } @@ -301,6 +304,7 @@ actor LiveSessionService { ) } var urlRequest = URLRequest(url: url) + urlRequest.timeoutInterval = requestOptions.timeout urlRequest.setValue(firebaseInfo.apiKey, forHTTPHeaderField: "x-goog-api-key") urlRequest.setValue( "\(GenerativeAIService.languageTag) \(GenerativeAIService.firebaseVersionTag)", diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveGenerativeModel.swift b/FirebaseAI/Sources/Types/Public/Live/LiveGenerativeModel.swift index fcf0bf712ee..d53fb58f13f 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveGenerativeModel.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveGenerativeModel.swift @@ -28,6 +28,7 @@ public final class LiveGenerativeModel { let toolConfig: ToolConfig? let systemInstruction: ModelContent? let urlSession: URLSession + let requestOptions: RequestOptions init(modelResourceName: String, firebaseInfo: FirebaseInfo, @@ -36,7 +37,8 @@ public final class LiveGenerativeModel { tools: [Tool]? = nil, toolConfig: ToolConfig? = nil, systemInstruction: ModelContent? = nil, - urlSession: URLSession = GenAIURLSession.default) { + urlSession: URLSession = GenAIURLSession.default, + requestOptions: RequestOptions) { self.modelResourceName = modelResourceName self.firebaseInfo = firebaseInfo self.apiConfig = apiConfig @@ -45,6 +47,7 @@ public final class LiveGenerativeModel { self.toolConfig = toolConfig self.systemInstruction = systemInstruction self.urlSession = urlSession + self.requestOptions = requestOptions } /// Start a ``LiveSession`` with the server for bidirectional streaming. @@ -60,6 +63,7 @@ public final class LiveGenerativeModel { tools: tools, toolConfig: toolConfig, systemInstruction: systemInstruction, + requestOptions: requestOptions ) await service.connect() From 00ebe135416d57dabf98d8a6a3ee6c552ea7827e Mon Sep 17 00:00:00 2001 From: Daymon Date: Wed, 10 Sep 2025 12:34:50 -0500 Subject: [PATCH 27/98] Minor doc fixes --- .../Sources/Types/Internal/Live/LiveSessionService.swift | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift b/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift index 99c96a48643..3b750f85746 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift @@ -23,9 +23,8 @@ import Foundation /// Facilitates communication with the backend for a ``LiveSession``. /// /// Using an actor will make it easier to adopt session resumption, as we have an isolated place for -/// mainting mutablity, which is backed -/// by Swift concurrency implicity; allowing us to avoid various edge-case issues with dead-locks -/// and data races. +/// mainting mutablity, which is backed by Swift concurrency implicity; allowing us to avoid various +/// edge-case issues with dead-locks and data races. /// /// This mainly comes into play when we don't want to block developers from sending messages while a /// session is being reloaded. @@ -59,7 +58,7 @@ actor LiveSessionService { /// Used to hold off on sending messages until the server is ready. private var setupTask: Task - /// Long running task that that wraps around the websocket, propogating messages through the + /// Long running task that that wraps around the websocket, propogating messages through the /// public stream. private var responsesTask: Task? From 3c442a5db70783a8ad01cc7cad2ecd81ed47deeb Mon Sep 17 00:00:00 2001 From: Daymon Date: Wed, 10 Sep 2025 13:02:51 -0500 Subject: [PATCH 28/98] Revert "Analytics 12.3.0 (#15310)" This reverts commit f0f9e4d4c14e600dd7c3fe5116c7a7990cbf4831. --- FirebaseAnalytics.podspec | 2 +- GoogleAppMeasurement.podspec | 4 ++-- Package.swift | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/FirebaseAnalytics.podspec b/FirebaseAnalytics.podspec index aa98391508a..bba40f73c84 100644 --- a/FirebaseAnalytics.podspec +++ b/FirebaseAnalytics.podspec @@ -13,7 +13,7 @@ Pod::Spec.new do |s| s.authors = 'Google, Inc.' s.source = { - :http => 'https://dl.google.com/firebase/ios/analytics/7f774173bfc50ea8/FirebaseAnalytics-12.3.0.tar.gz' + :http => 'https://dl.google.com/firebase/ios/analytics/20f7f19c421351ed/FirebaseAnalytics-12.2.0.tar.gz' } s.cocoapods_version = '>= 1.12.0' diff --git a/GoogleAppMeasurement.podspec b/GoogleAppMeasurement.podspec index 524d667ae7c..076e2641595 100644 --- a/GoogleAppMeasurement.podspec +++ b/GoogleAppMeasurement.podspec @@ -16,7 +16,7 @@ Pod::Spec.new do |s| s.authors = 'Google, Inc.' s.source = { - :http => 'https://dl.google.com/firebase/ios/analytics/1c0181b69fa16f29/GoogleAppMeasurement-12.3.0.tar.gz' + :http => 'https://dl.google.com/firebase/ios/analytics/47d80ee1ff340179/GoogleAppMeasurement-12.2.0.tar.gz' } s.cocoapods_version = '>= 1.12.0' @@ -39,7 +39,7 @@ Pod::Spec.new do |s| s.subspec 'Default' do |ss| ss.dependency 'GoogleAppMeasurement/Core', '12.3.0' ss.dependency 'GoogleAppMeasurement/IdentitySupport', '12.3.0' - ss.ios.dependency 'GoogleAdsOnDeviceConversion', '~> 3.0.0' + ss.ios.dependency 'GoogleAdsOnDeviceConversion', '2.3.0' end s.subspec 'Core' do |ss| diff --git a/Package.swift b/Package.swift index d2d8a2009b2..25bff49093a 100644 --- a/Package.swift +++ b/Package.swift @@ -329,8 +329,8 @@ let package = Package( ), .binaryTarget( name: "FirebaseAnalytics", - url: "https://dl.google.com/firebase/ios/swiftpm/12.3.0/FirebaseAnalytics.zip", - checksum: "a7fcb34227d6cc0b2db9b1d3f9dd844801e5a28217f20f1daae6c3d2b7d1e8e1" + url: "https://dl.google.com/firebase/ios/swiftpm/12.2.0/FirebaseAnalytics.zip", + checksum: "f1b07dabcdf3f2b6c495af72baa55e40672a625b8a1b6c631fb43ec74a2ec1ca" ), .testTarget( name: "AnalyticsSwiftUnit", @@ -1392,7 +1392,7 @@ func googleAppMeasurementDependency() -> Package.Dependency { return .package(url: appMeasurementURL, branch: "main") } - return .package(url: appMeasurementURL, exact: "12.3.0") + return .package(url: appMeasurementURL, exact: "12.2.0") } func abseilDependency() -> Package.Dependency { From dba2bc06d925bb9a78630ec43145d56ec8591721 Mon Sep 17 00:00:00 2001 From: Daymon Date: Wed, 17 Sep 2025 13:02:08 -0500 Subject: [PATCH 29/98] Remove default from video realtime --- FirebaseAI/Sources/Types/Public/Live/LiveSession.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift b/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift index b9049db4b8c..de3d08e61e6 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift @@ -69,7 +69,7 @@ public final class LiveSession: Sendable { /// - Parameters: /// - video: Encoded video data, used to update the model on the client's conversation. /// - format: The format that the video was encoded in (eg; `mp4`, `webm`, `wmv`, etc.,). - public func sendVideoRealtime(video: Data, format: String = "mp4") async { + public func sendVideoRealtime(video: Data, format: String) async { let message = BidiGenerateContentRealtimeInput( video: InlineData(data: video, mimeType: "video/\(format)") ) From 79023490713b2d86cc2ca416a740de4b6b1a5187 Mon Sep 17 00:00:00 2001 From: Daymon Date: Wed, 17 Sep 2025 13:13:50 -0500 Subject: [PATCH 30/98] Add the close method --- .../Internal/Live/LiveSessionService.swift | 28 +++++++++---------- .../Types/Public/Live/LiveSession.swift | 11 ++++++++ 2 files changed, 25 insertions(+), 14 deletions(-) diff --git a/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift b/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift index 3b750f85746..23eb8d631bd 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift @@ -125,6 +125,20 @@ actor LiveSessionService { } } + /// Cancel any running tasks and close the websocket. + /// + /// This method is idempotent; if it's already ran once, it will effectively be a no-op. + public func close() { + setupTask.cancel() + responsesTask?.cancel() + messageQueueTask?.cancel() + webSocket?.disconnect() + + webSocket = nil + responsesTask = nil + messageQueueTask = nil + } + /// Start a fresh websocket to the backend, and listen for responses. /// /// Will hold off on sending any messages until the server sends a setupComplete mesage. @@ -269,20 +283,6 @@ actor LiveSessionService { } } - /// Cancel any running tasks and close the websocket. - /// - /// This method is idempotent; if it's already ran once, it will effectively be a no-op. - private func close() { - setupTask.cancel() - responsesTask?.cancel() - messageQueueTask?.cancel() - webSocket?.disconnect() - - webSocket = nil - responsesTask = nil - messageQueueTask = nil - } - /// Creates a websocket pointing to the backend. /// /// Will apply the required app check and auth headers, as the backend expects them. diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift b/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift index de3d08e61e6..486c1d75e06 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift @@ -126,4 +126,15 @@ public final class LiveSession: Sendable { turnComplete: Bool? = nil) async { await sendContent([ModelContent(parts: parts)], turnComplete: turnComplete) } + + /// Permanently stop the conversation with the model, and close the connection to the server + /// + /// This method will be called automatically when the ``LiveSession`` is deinitialized, but this method + /// can be called manually to explicitly end the session. + /// + /// Attempting to receive content from a closed session will cause a ``LiveSessionUnexpectedClosureError`` error + /// to be thrown. + public func close() async { + await service.close() + } } From 24ef7f060771bc3aea7ed4ab24ee4900210dce60 Mon Sep 17 00:00:00 2001 From: Daymon Date: Wed, 17 Sep 2025 13:14:01 -0500 Subject: [PATCH 31/98] Add a todo about adding start --- FirebaseAI/Sources/Types/Public/Live/LiveSession.swift | 2 ++ 1 file changed, 2 insertions(+) diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift b/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift index 486c1d75e06..7dbd35f73e6 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift @@ -137,4 +137,6 @@ public final class LiveSession: Sendable { public func close() async { await service.close() } + + // TODO: b(445716402) Add a start method when we support session resumption } From c252b0a8d4c3d9d80a5eb5700bd5b911631fb4ae Mon Sep 17 00:00:00 2001 From: Daymon Date: Wed, 17 Sep 2025 13:14:31 -0500 Subject: [PATCH 32/98] Fix missing id usage --- FirebaseAI/Sources/Types/Public/Part.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FirebaseAI/Sources/Types/Public/Part.swift b/FirebaseAI/Sources/Types/Public/Part.swift index 37707d9de48..bcfa6d42148 100644 --- a/FirebaseAI/Sources/Types/Public/Part.swift +++ b/FirebaseAI/Sources/Types/Public/Part.swift @@ -149,7 +149,7 @@ public struct FunctionCallPart: Part { /// Unique id of the function call. If present, the returned ``FunctionResponsePart`` /// should have a matching `id` field. - public var id: String? + public var id: String? { functionCall.id } /// Constructs a new function call part. /// From b469e9c0500fcbdd3d41ecc6a05027e4790e6f65 Mon Sep 17 00:00:00 2001 From: Daymon Date: Wed, 17 Sep 2025 13:15:57 -0500 Subject: [PATCH 33/98] Formatting --- FirebaseAI/Sources/Types/Public/Live/LiveSession.swift | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift b/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift index 7dbd35f73e6..728217ad597 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift @@ -129,11 +129,11 @@ public final class LiveSession: Sendable { /// Permanently stop the conversation with the model, and close the connection to the server /// - /// This method will be called automatically when the ``LiveSession`` is deinitialized, but this method - /// can be called manually to explicitly end the session. + /// This method will be called automatically when the ``LiveSession`` is deinitialized, but this + /// method can be called manually to explicitly end the session. /// - /// Attempting to receive content from a closed session will cause a ``LiveSessionUnexpectedClosureError`` error - /// to be thrown. + /// Attempting to receive content from a closed session will cause a + /// ``LiveSessionUnexpectedClosureError`` error to be thrown. public func close() async { await service.close() } From 12f9f278c31def208c465f2d39d2c37a705c0cca Mon Sep 17 00:00:00 2001 From: Daymon Date: Wed, 17 Sep 2025 13:20:02 -0500 Subject: [PATCH 34/98] Temporarily add support for function behavior --- FirebaseAI/Sources/Tool.swift | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/FirebaseAI/Sources/Tool.swift b/FirebaseAI/Sources/Tool.swift index 78dc8ef9443..eefaf700f73 100644 --- a/FirebaseAI/Sources/Tool.swift +++ b/FirebaseAI/Sources/Tool.swift @@ -29,6 +29,9 @@ public struct FunctionDeclaration: Sendable { /// Describes the parameters to this function; must be of type `DataType.object`. let parameters: Schema? + // TODO: remove (added for testing) + let behavior: FunctionBehavior? + /// Constructs a new `FunctionDeclaration`. /// /// - Parameters: @@ -40,8 +43,21 @@ public struct FunctionDeclaration: Sendable { /// calls; by default, all parameters are considered required. public init(name: String, description: String, parameters: [String: Schema], optionalParameters: [String] = []) { + self.init( + name: name, + description: description, + parameters: parameters, + optionalParameters: optionalParameters, + functionBehavior: nil + ) + } + + // TODO: remove (added for testing) + public init(name: String, description: String, parameters: [String: Schema], + optionalParameters: [String] = [], functionBehavior: FunctionBehavior? = nil) { self.name = name self.description = description + behavior = functionBehavior self.parameters = Schema.object( properties: parameters, optionalProperties: optionalParameters, @@ -50,6 +66,12 @@ public struct FunctionDeclaration: Sendable { } } +// TODO: remove (added for testing) +public enum FunctionBehavior: String, Sendable, Encodable { + case blocking = "BLOCKING" + case nonBlocking = "NON_BLOCKING" +} + /// A tool that allows the generative model to connect to Google Search to access and incorporate /// up-to-date information from the web into its responses. /// @@ -200,6 +222,7 @@ extension FunctionDeclaration: Encodable { case name case description case parameters + case behavior // TODO: remove (added for testing) } public func encode(to encoder: Encoder) throws { @@ -207,6 +230,7 @@ extension FunctionDeclaration: Encodable { try container.encode(name, forKey: .name) try container.encode(description, forKey: .description) try container.encode(parameters, forKey: .parameters) + try container.encode(behavior, forKey: .behavior) // TODO: remove (added for testing) } } From c4339fa99b24603b5633aadb3579aae1ca7b8ad9 Mon Sep 17 00:00:00 2001 From: Daymon Date: Thu, 25 Sep 2025 12:40:11 -0500 Subject: [PATCH 35/98] Add official support for transcripts --- .../BidiGenerateContentServerContent.swift | 2 + .../Live/BidiGenerateContentSetup.swift | 20 ++-- .../BidiGenerateContentTranscription.swift | 1 - .../Internal/Live/BidiGenerationConfig.swift | 46 +++++++++ ...echConfig.swift => BidiSpeechConfig.swift} | 2 +- .../Internal/Live/LiveSessionService.swift | 5 +- .../Live/AudioTranscriptionConfig.swift | 30 ++++++ .../Public/Live/LiveGenerationConfig.swift | 94 ++++++++----------- .../Types/Public/Live/LiveServerContent.swift | 22 +++-- ...anscript.swift => LiveTranscription.swift} | 6 +- ...eSpeechConfig.swift => SpeechConfig.swift} | 8 +- 11 files changed, 158 insertions(+), 78 deletions(-) create mode 100644 FirebaseAI/Sources/Types/Internal/Live/BidiGenerationConfig.swift rename FirebaseAI/Sources/Types/Internal/Live/{SpeechConfig.swift => BidiSpeechConfig.swift} (95%) create mode 100644 FirebaseAI/Sources/Types/Public/Live/AudioTranscriptionConfig.swift rename FirebaseAI/Sources/Types/Public/Live/{LiveTranscript.swift => LiveTranscription.swift} (80%) rename FirebaseAI/Sources/Types/Public/Live/{LiveSpeechConfig.swift => SpeechConfig.swift} (92%) diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerContent.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerContent.swift index e52d32be8bf..98a4b8c42e9 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerContent.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerContent.swift @@ -51,5 +51,7 @@ struct BidiGenerateContentServerContent: Decodable, Sendable { /// Metadata specifies sources used to ground generated content. let groundingMetadata: GroundingMetadata? + let inputTranscription: BidiGenerateContentTranscription? + let outputTranscription: BidiGenerateContentTranscription? } diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetup.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetup.swift index 9f154cb2292..7a0ff6ba4f5 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetup.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetup.swift @@ -29,7 +29,7 @@ struct BidiGenerateContentSetup: Encodable { let model: String /// Generation config. - let generationConfig: LiveGenerationConfig? + let generationConfig: BidiGenerationConfig? /// The user provided system instructions for the model. /// Note: only text should be used in parts and content in each part will be @@ -48,18 +48,24 @@ struct BidiGenerateContentSetup: Encodable { /// Configures the handling of realtime input. let realtimeInputConfig: RealtimeInputConfig? - let inputAudioTranscription: AudioTranscriptionConfig? + /// Input transcription. The transcription is independent to the model turn + /// which means it doesn't imply any ordering between transcription and model + /// turn. + let inputAudioTranscription: BidiAudioTranscriptionConfig? - let outputAudioTranscription: AudioTranscriptionConfig? + /// Output transcription. The transcription is independent to the model turn + /// which means it doesn't imply any ordering between transcription and model + /// turn. + let outputAudioTranscription: BidiAudioTranscriptionConfig? init(model: String, - generationConfig: LiveGenerationConfig? = nil, + generationConfig: BidiGenerationConfig? = nil, systemInstruction: ModelContent? = nil, tools: [Tool]? = nil, toolConfig: ToolConfig? = nil, realtimeInputConfig: RealtimeInputConfig? = nil, - inputAudioTranscription: AudioTranscriptionConfig? = nil, - outputAudioTranscription: AudioTranscriptionConfig? = nil) { + inputAudioTranscription: BidiAudioTranscriptionConfig? = nil, + outputAudioTranscription: BidiAudioTranscriptionConfig? = nil) { self.model = model self.generationConfig = generationConfig self.systemInstruction = systemInstruction @@ -71,4 +77,4 @@ struct BidiGenerateContentSetup: Encodable { } } -struct AudioTranscriptionConfig: Encodable {} +struct BidiAudioTranscriptionConfig: Encodable {} diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentTranscription.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentTranscription.swift index 6ce345538a1..4c5cb965b2a 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentTranscription.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentTranscription.swift @@ -15,5 +15,4 @@ @available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) struct BidiGenerateContentTranscription: Decodable, Sendable { let text: String? - let finished: Bool? } diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerationConfig.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerationConfig.swift new file mode 100644 index 00000000000..918a3693769 --- /dev/null +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerationConfig.swift @@ -0,0 +1,46 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +/// Configuration options for live content generation. +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +internal struct BidiGenerationConfig: Encodable, Sendable { + let temperature: Float? + let topP: Float? + let topK: Int? + let candidateCount: Int? + let maxOutputTokens: Int? + let presencePenalty: Float? + let frequencyPenalty: Float? + let responseModalities: [ResponseModality]? + let speechConfig: BidiSpeechConfig? + + init(temperature: Float? = nil, topP: Float? = nil, topK: Int? = nil, + candidateCount: Int? = nil, maxOutputTokens: Int? = nil, + presencePenalty: Float? = nil, frequencyPenalty: Float? = nil, + responseModalities: [ResponseModality]? = nil, + speechConfig: BidiSpeechConfig? = nil + ) { + self.temperature = temperature + self.topP = topP + self.topK = topK + self.candidateCount = candidateCount + self.maxOutputTokens = maxOutputTokens + self.presencePenalty = presencePenalty + self.frequencyPenalty = frequencyPenalty + self.responseModalities = responseModalities + self.speechConfig = speechConfig + } +} diff --git a/FirebaseAI/Sources/Types/Internal/Live/SpeechConfig.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiSpeechConfig.swift similarity index 95% rename from FirebaseAI/Sources/Types/Internal/Live/SpeechConfig.swift rename to FirebaseAI/Sources/Types/Internal/Live/BidiSpeechConfig.swift index c41ea5dfc00..be27b499c31 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/SpeechConfig.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiSpeechConfig.swift @@ -15,7 +15,7 @@ import Foundation /// Speech generation config. -struct SpeechConfig: Encodable, Sendable { +struct BidiSpeechConfig: Encodable, Sendable { /// The configuration for the speaker to use. let voiceConfig: VoiceConfig diff --git a/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift b/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift index 23eb8d631bd..328d644b557 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift @@ -180,11 +180,12 @@ actor LiveSessionService { do { let setup = BidiGenerateContentSetup( model: modelResourceName, - generationConfig: generationConfig, + generationConfig: generationConfig?.bidiGenerationConfig, systemInstruction: systemInstruction, tools: tools, toolConfig: toolConfig, - outputAudioTranscription: AudioTranscriptionConfig() + inputAudioTranscription: generationConfig?.inputAudioTranscription, + outputAudioTranscription: generationConfig?.outputAudioTranscription ) let data = try jsonEncoder.encode(BidiGenerateContentClientMessage.setup(setup)) try await webSocket.send(.data(data)) diff --git a/FirebaseAI/Sources/Types/Public/Live/AudioTranscriptionConfig.swift b/FirebaseAI/Sources/Types/Public/Live/AudioTranscriptionConfig.swift new file mode 100644 index 00000000000..7b0552869d0 --- /dev/null +++ b/FirebaseAI/Sources/Types/Public/Live/AudioTranscriptionConfig.swift @@ -0,0 +1,30 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/// Configuration options for audio transcriptions when communicating with a live model. +/// +/// While there are not currently any options, this will likely change in the future. For now, just providing +/// an instance of this struct will enable audio transcriptions for the corresponding input or output fields. +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +public struct AudioTranscriptionConfig: Sendable { + let audioTranscriptionConfig: BidiAudioTranscriptionConfig + + init(_ audioTranscriptionConfig: BidiAudioTranscriptionConfig) { + self.audioTranscriptionConfig = audioTranscriptionConfig + } + + public init() { + self.init(BidiAudioTranscriptionConfig()) + } +} diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveGenerationConfig.swift b/FirebaseAI/Sources/Types/Public/Live/LiveGenerationConfig.swift index 26e9b2bb951..1a271ff49d1 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveGenerationConfig.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveGenerationConfig.swift @@ -17,32 +17,9 @@ import Foundation /// Configuration options for live content generation. @available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) public struct LiveGenerationConfig: Sendable { - /// Controls the degree of randomness in token selection. - let temperature: Float? - - /// Controls diversity of generated text. - let topP: Float? - - /// Limits the number of highest probability words considered. - let topK: Int? - - /// The number of response variations to return. - let candidateCount: Int? - - /// Maximum number of tokens that can be generated in the response. - let maxOutputTokens: Int? - - /// Controls the likelihood of repeating the same words or phrases already generated in the text. - let presencePenalty: Float? - - /// Controls the likelihood of repeating words, with the penalty increasing for each repetition. - let frequencyPenalty: Float? - - /// Supported modalities of the response. - let responseModalities: [ResponseModality]? - - /// Controls the voice of the model during conversation. - let speechConfig: SpeechConfig? + let bidiGenerationConfig: BidiGenerationConfig + let inputAudioTranscription: BidiAudioTranscriptionConfig? + let outputAudioTranscription: BidiAudioTranscriptionConfig? /// Creates a new ``LiveGenerationConfig`` value. /// @@ -125,38 +102,49 @@ public struct LiveGenerationConfig: Sendable { /// > backwards-incompatible ways. /// - speechConfig: Controls the voice of the model, when streaming `audio` via /// ``ResponseModality``. + /// - inputAudioTranscription: Configures (and enables) input transcriptions when streaming to the model. + /// + /// Input transcripts are the model's interprutation of audio data sent to it, and they are populated in model responses via ``LiveServerContent``. + /// When this fields is set to `nil`, input transcripts are not populated in model responses. + /// - outputAudioTranscription: Configures (and enables) output transcriptions when streaming to the model. + /// + /// Output transcripts are text representations of the audio the model is sending to the client, and they are populated in model responses via ``LiveServerContent`` + /// When this fields is set to `nil`, output transcripts are not populated in model responses. + /// + /// > Important: Transcripts are independent to the model turn. This means transcripts may come earlier or later than when + /// > the model sends the corresponding audio responses. public init(temperature: Float? = nil, topP: Float? = nil, topK: Int? = nil, candidateCount: Int? = nil, maxOutputTokens: Int? = nil, presencePenalty: Float? = nil, frequencyPenalty: Float? = nil, responseModalities: [ResponseModality]? = nil, - speechConfig: LiveSpeechConfig? = nil) { - // Explicit init because otherwise if we re-arrange the above variables it changes the API - // surface. - self.temperature = temperature - self.topP = topP - self.topK = topK - self.candidateCount = candidateCount - self.maxOutputTokens = maxOutputTokens - self.presencePenalty = presencePenalty - self.frequencyPenalty = frequencyPenalty - self.responseModalities = responseModalities - self.speechConfig = speechConfig?.speechConfig + speechConfig: SpeechConfig? = nil, + inputAudioTranscription: AudioTranscriptionConfig? = nil, + outputAudioTranscription: AudioTranscriptionConfig? = nil + ) { + self.init( + BidiGenerationConfig( + temperature: temperature, + topP: topP, + topK: topK, + candidateCount: candidateCount, + maxOutputTokens: maxOutputTokens, + presencePenalty: presencePenalty, + frequencyPenalty: frequencyPenalty, + responseModalities: responseModalities, + speechConfig: speechConfig?.speechConfig + ), + inputAudioTranscription: inputAudioTranscription?.audioTranscriptionConfig, + outputAudioTranscription: outputAudioTranscription?.audioTranscriptionConfig + ) } -} - -// MARK: - Codable Conformances -@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) -extension LiveGenerationConfig: Encodable { - enum CodingKeys: String, CodingKey { - case temperature - case topP - case topK - case candidateCount - case maxOutputTokens - case presencePenalty - case frequencyPenalty - case responseModalities - case speechConfig + init( + _ bidiGenerationConfig: BidiGenerationConfig, + inputAudioTranscription: BidiAudioTranscriptionConfig? = nil, + outputAudioTranscription: BidiAudioTranscriptionConfig? = nil + ) { + self.bidiGenerationConfig = bidiGenerationConfig + self.inputAudioTranscription = inputAudioTranscription + self.outputAudioTranscription = outputAudioTranscription } } diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveServerContent.swift b/FirebaseAI/Sources/Types/Public/Live/LiveServerContent.swift index 398a99099c2..7a6a7b81632 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveServerContent.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveServerContent.swift @@ -60,13 +60,21 @@ public struct LiveServerContent: Sendable { /// Metadata specifing the sources used to ground generated content. public var groundingMetadata: GroundingMetadata? { serverContent.groundingMetadata } - // TODO: remove - public var transcript: LiveTranscript? { - if let transcript = serverContent.outputTranscription { - LiveTranscript(transcript) - } else { - nil - } + /// The model's interpretation of what the client said in an audio message. + /// + /// This field is only populated when an ``AudioTranscriptionConfig`` is provided to ``LiveGenerationConfig``. + public var inputTranscription: LiveTranscription? { + serverContent.inputTranscription.map { LiveTranscription($0) } + } + + /// Transcription matching the model's audio response. + /// + /// This field is only populated when an ``AudioTranscriptionConfig`` is provided to ``LiveGenerationConfig``. + /// + /// > Important: Transcripts are independent to the model turn. This means transcripts may come earlier or later than when + /// > the model sends the corresponding audio responses. + public var outputTranscription: LiveTranscription? { + serverContent.outputTranscription.map { LiveTranscription($0) } } init(_ serverContent: BidiGenerateContentServerContent) { diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveTranscript.swift b/FirebaseAI/Sources/Types/Public/Live/LiveTranscription.swift similarity index 80% rename from FirebaseAI/Sources/Types/Public/Live/LiveTranscript.swift rename to FirebaseAI/Sources/Types/Public/Live/LiveTranscription.swift index 8545b669976..72dec9d21f7 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveTranscript.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveTranscription.swift @@ -12,12 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. -// TODO: remove +/// Text transcription of some audio form during a live interaction with the model. @available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) -public struct LiveTranscript: Sendable { +public struct LiveTranscription: Sendable { let transcript: BidiGenerateContentTranscription + /// Text representing the model's interpretation of what the audio said. public var text: String? { transcript.text } - public var finished: Bool? { transcript.finished } init(_ transcript: BidiGenerateContentTranscription) { self.transcript = transcript diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveSpeechConfig.swift b/FirebaseAI/Sources/Types/Public/Live/SpeechConfig.swift similarity index 92% rename from FirebaseAI/Sources/Types/Public/Live/LiveSpeechConfig.swift rename to FirebaseAI/Sources/Types/Public/Live/SpeechConfig.swift index 7c637bfe85e..6e4497b1d36 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveSpeechConfig.swift +++ b/FirebaseAI/Sources/Types/Public/Live/SpeechConfig.swift @@ -16,10 +16,10 @@ import Foundation /// Configuration for controlling the voice of the model during conversation. @available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) -public struct LiveSpeechConfig: Sendable { - let speechConfig: SpeechConfig +public struct SpeechConfig: Sendable { + let speechConfig: BidiSpeechConfig - init(_ speechConfig: SpeechConfig) { + init(_ speechConfig: BidiSpeechConfig) { self.speechConfig = speechConfig } @@ -37,7 +37,7 @@ public struct LiveSpeechConfig: Sendable { /// [Supported languages](https://ai.google.dev/gemini-api/docs/speech-generation#languages)\. public init(voiceName: String, languageCode: String? = nil) { self.init( - SpeechConfig( + BidiSpeechConfig( voiceConfig: .prebuiltVoiceConfig(.init(voiceName: voiceName)), languageCode: languageCode ) From 9527026d7d39009aa0bc0d449d9dbc4532535098 Mon Sep 17 00:00:00 2001 From: Daymon Date: Thu, 25 Sep 2025 12:44:04 -0500 Subject: [PATCH 36/98] Formatting --- .../Internal/Live/BidiGenerationConfig.swift | 11 +++---- .../Live/AudioTranscriptionConfig.swift | 5 +-- .../Public/Live/LiveGenerationConfig.swift | 31 ++++++++++--------- .../Types/Public/Live/LiveServerContent.swift | 10 +++--- 4 files changed, 30 insertions(+), 27 deletions(-) diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerationConfig.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerationConfig.swift index 918a3693769..5226e2ec79a 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerationConfig.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerationConfig.swift @@ -16,7 +16,7 @@ import Foundation /// Configuration options for live content generation. @available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) -internal struct BidiGenerationConfig: Encodable, Sendable { +struct BidiGenerationConfig: Encodable, Sendable { let temperature: Float? let topP: Float? let topK: Int? @@ -28,11 +28,10 @@ internal struct BidiGenerationConfig: Encodable, Sendable { let speechConfig: BidiSpeechConfig? init(temperature: Float? = nil, topP: Float? = nil, topK: Int? = nil, - candidateCount: Int? = nil, maxOutputTokens: Int? = nil, - presencePenalty: Float? = nil, frequencyPenalty: Float? = nil, - responseModalities: [ResponseModality]? = nil, - speechConfig: BidiSpeechConfig? = nil - ) { + candidateCount: Int? = nil, maxOutputTokens: Int? = nil, + presencePenalty: Float? = nil, frequencyPenalty: Float? = nil, + responseModalities: [ResponseModality]? = nil, + speechConfig: BidiSpeechConfig? = nil) { self.temperature = temperature self.topP = topP self.topK = topK diff --git a/FirebaseAI/Sources/Types/Public/Live/AudioTranscriptionConfig.swift b/FirebaseAI/Sources/Types/Public/Live/AudioTranscriptionConfig.swift index 7b0552869d0..7058e6d6091 100644 --- a/FirebaseAI/Sources/Types/Public/Live/AudioTranscriptionConfig.swift +++ b/FirebaseAI/Sources/Types/Public/Live/AudioTranscriptionConfig.swift @@ -14,8 +14,9 @@ /// Configuration options for audio transcriptions when communicating with a live model. /// -/// While there are not currently any options, this will likely change in the future. For now, just providing -/// an instance of this struct will enable audio transcriptions for the corresponding input or output fields. +/// While there are not currently any options, this will likely change in the future. For now, just +/// providing an instance of this struct will enable audio transcriptions for the corresponding +/// input or output fields. @available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) public struct AudioTranscriptionConfig: Sendable { let audioTranscriptionConfig: BidiAudioTranscriptionConfig diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveGenerationConfig.swift b/FirebaseAI/Sources/Types/Public/Live/LiveGenerationConfig.swift index 1a271ff49d1..a1e5398a044 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveGenerationConfig.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveGenerationConfig.swift @@ -102,25 +102,28 @@ public struct LiveGenerationConfig: Sendable { /// > backwards-incompatible ways. /// - speechConfig: Controls the voice of the model, when streaming `audio` via /// ``ResponseModality``. - /// - inputAudioTranscription: Configures (and enables) input transcriptions when streaming to the model. + /// - inputAudioTranscription: Configures (and enables) input transcriptions when streaming to + /// the model. /// - /// Input transcripts are the model's interprutation of audio data sent to it, and they are populated in model responses via ``LiveServerContent``. - /// When this fields is set to `nil`, input transcripts are not populated in model responses. - /// - outputAudioTranscription: Configures (and enables) output transcriptions when streaming to the model. + /// Input transcripts are the model's interprutation of audio data sent to it, and they are + /// populated in model responses via ``LiveServerContent``. When this fields is set to `nil`, + /// input transcripts are not populated in model responses. + /// - outputAudioTranscription: Configures (and enables) output transcriptions when streaming to + /// the model. /// - /// Output transcripts are text representations of the audio the model is sending to the client, and they are populated in model responses via ``LiveServerContent`` - /// When this fields is set to `nil`, output transcripts are not populated in model responses. + /// Output transcripts are text representations of the audio the model is sending to the + /// client, and they are populated in model responses via ``LiveServerContent``. When this + /// fields is set to `nil`, output transcripts are not populated in model responses. /// - /// > Important: Transcripts are independent to the model turn. This means transcripts may come earlier or later than when - /// > the model sends the corresponding audio responses. + /// > Important: Transcripts are independent to the model turn. This means transcripts may + /// > come earlier or later than when the model sends the corresponding audio responses. public init(temperature: Float? = nil, topP: Float? = nil, topK: Int? = nil, candidateCount: Int? = nil, maxOutputTokens: Int? = nil, presencePenalty: Float? = nil, frequencyPenalty: Float? = nil, responseModalities: [ResponseModality]? = nil, speechConfig: SpeechConfig? = nil, inputAudioTranscription: AudioTranscriptionConfig? = nil, - outputAudioTranscription: AudioTranscriptionConfig? = nil - ) { + outputAudioTranscription: AudioTranscriptionConfig? = nil) { self.init( BidiGenerationConfig( temperature: temperature, @@ -138,11 +141,9 @@ public struct LiveGenerationConfig: Sendable { ) } - init( - _ bidiGenerationConfig: BidiGenerationConfig, - inputAudioTranscription: BidiAudioTranscriptionConfig? = nil, - outputAudioTranscription: BidiAudioTranscriptionConfig? = nil - ) { + init(_ bidiGenerationConfig: BidiGenerationConfig, + inputAudioTranscription: BidiAudioTranscriptionConfig? = nil, + outputAudioTranscription: BidiAudioTranscriptionConfig? = nil) { self.bidiGenerationConfig = bidiGenerationConfig self.inputAudioTranscription = inputAudioTranscription self.outputAudioTranscription = outputAudioTranscription diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveServerContent.swift b/FirebaseAI/Sources/Types/Public/Live/LiveServerContent.swift index 7a6a7b81632..ad232456368 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveServerContent.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveServerContent.swift @@ -62,17 +62,19 @@ public struct LiveServerContent: Sendable { /// The model's interpretation of what the client said in an audio message. /// - /// This field is only populated when an ``AudioTranscriptionConfig`` is provided to ``LiveGenerationConfig``. + /// This field is only populated when an ``AudioTranscriptionConfig`` is provided to + /// ``LiveGenerationConfig``. public var inputTranscription: LiveTranscription? { serverContent.inputTranscription.map { LiveTranscription($0) } } /// Transcription matching the model's audio response. /// - /// This field is only populated when an ``AudioTranscriptionConfig`` is provided to ``LiveGenerationConfig``. + /// This field is only populated when an ``AudioTranscriptionConfig`` is provided to + /// ``LiveGenerationConfig``. /// - /// > Important: Transcripts are independent to the model turn. This means transcripts may come earlier or later than when - /// > the model sends the corresponding audio responses. + /// > Important: Transcripts are independent to the model turn. This means transcripts may + /// > come earlier or later than when the model sends the corresponding audio responses. public var outputTranscription: LiveTranscription? { serverContent.outputTranscription.map { LiveTranscription($0) } } From 48ce789b909028683f9323d8279e03f247a356fa Mon Sep 17 00:00:00 2001 From: Daymon Date: Thu, 25 Sep 2025 12:47:29 -0500 Subject: [PATCH 37/98] Add changelog entry --- FirebaseAI/CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/FirebaseAI/CHANGELOG.md b/FirebaseAI/CHANGELOG.md index 109e8f83318..745e6360e71 100644 --- a/FirebaseAI/CHANGELOG.md +++ b/FirebaseAI/CHANGELOG.md @@ -5,6 +5,12 @@ - [fixed] Fixed a decoding error when generating images with the `gemini-2.5-flash-image-preview` model using `generateContentStream` or `sendMessageStream` with the Gemini Developer API. (#15262) +- [feature] Added support for the Live API, which allows bidirectional + communication with the model in realtime. + + To get started with the Live API, see the Firebase docs on + [Bidirectional streaming using the Gemini Live API](https://firebase.google.com/docs/ai-logic/live-api). + (#15309) # 12.2.0 - [feature] Added support for returning thought summaries, which are synthesized From 240b94eb75205b17123b77687fbb2ce400646ade Mon Sep 17 00:00:00 2001 From: Daymon Date: Fri, 26 Sep 2025 13:13:55 -0500 Subject: [PATCH 38/98] Add protoduration support --- FirebaseAI/Sources/AILog.swift | 4 + .../Sources/Types/Internal/Live/GoAway.swift | 2 +- .../Types/Internal/ProtoDuration.swift | 112 ++++++++++++++++++ .../Types/Public/Live/LiveServerGoAway.swift | 2 +- 4 files changed, 118 insertions(+), 2 deletions(-) create mode 100644 FirebaseAI/Sources/Types/Internal/ProtoDuration.swift diff --git a/FirebaseAI/Sources/AILog.swift b/FirebaseAI/Sources/AILog.swift index 8f003b610c3..65ec0d6a46b 100644 --- a/FirebaseAI/Sources/AILog.swift +++ b/FirebaseAI/Sources/AILog.swift @@ -72,6 +72,10 @@ enum AILog { case liveSessionFailedToSendClientMessage = 3021 case liveSessionUnexpectedResponse = 3022 case liveSessionGoingAwaySoon = 3023 + case decodedMissingProtoDurationSuffix = 3024 + case decodedInvalidProtoDurationString = 3025 + case decodedInvalidProtoDurationSeconds = 3026 + case decodedInvalidProtoDurationNanoseconds = 3027 // SDK State Errors case generateContentResponseNoCandidates = 4000 diff --git a/FirebaseAI/Sources/Types/Internal/Live/GoAway.swift b/FirebaseAI/Sources/Types/Internal/Live/GoAway.swift index 573f89133f6..6fa046e7e61 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/GoAway.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/GoAway.swift @@ -20,5 +20,5 @@ struct GoAway: Decodable, Sendable { /// The remaining time before the connection will be terminated as ABORTED. /// The minimal time returned here is specified differently together with /// the rate limits for a given model. - let timeLeft: TimeInterval? + let timeLeft: ProtoDuration? } diff --git a/FirebaseAI/Sources/Types/Internal/ProtoDuration.swift b/FirebaseAI/Sources/Types/Internal/ProtoDuration.swift new file mode 100644 index 00000000000..1dac21d6429 --- /dev/null +++ b/FirebaseAI/Sources/Types/Internal/ProtoDuration.swift @@ -0,0 +1,112 @@ +// Copyright 2024 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +/// Represents a signed, fixed-length span of time represented +/// as a count of seconds and fractions of seconds at nanosecond +/// resolution. +/// +/// This represents a +/// [`google.protobuf.duration`](https://protobuf.dev/reference/protobuf/google.protobuf/#duration). +struct ProtoDuration { + /// Signed seconds of the span of time. + /// + /// Must be from -315,576,000,000 to +315,576,000,000 inclusive. + /// + /// Note: these bounds are computed from: + /// 60 sec/min * 60 min/hr * 24 hr/day * 365.25 days/year * 10000 years + let seconds: Int64 + + /// Signed fractions of a second at nanosecond resolution of the span of time. + /// + /// Durations less than one second are represented with a 0 + /// `seconds` field and a positive or negative `nanos` field. + /// + /// For durations of one second or more, a non-zero value for the `nanos` field must be + /// of the same sign as the `seconds` field. Must be from -999,999,999 + /// to +999,999,999 inclusive. + let nanos: Int32 + + /// Returns a `TimeInterval` representation of the `ProtoDuration`. + var timeInterval: TimeInterval { + return TimeInterval(seconds) + TimeInterval(nanos) / 1_000_000_000 + } +} + +// MARK: - Codable Conformance + +extension ProtoDuration: Decodable { + init(from decoder: any Decoder) throws { + var text = try decoder.singleValueContainer().decode(String.self) + if text.last != "s" { + AILog.warning( + code: .decodedMissingProtoDurationSuffix, + "Missing 's' at end of proto duration: \(text)." + ) + } else { + text.removeLast() + } + + let seconds: String + let nanoseconds: String + + let maybeSplit = text.split(separator: ".") + if maybeSplit.count > 2 { + AILog.warning( + code: .decodedInvalidProtoDurationString, + "Too many decimal places in proto duration (expected only 1): \(maybeSplit)." + ) + throw DecodingError.dataCorrupted(.init( + codingPath: [], + debugDescription: "Invalid proto duration string: \(text)" + )) + } + + if maybeSplit.count == 2 { + seconds = String(maybeSplit[0]) + nanoseconds = String(maybeSplit[1]) + } else { + seconds = text + nanoseconds = "0" + } + + guard let secs = Int64(seconds) else { + AILog.warning( + code: .decodedInvalidProtoDurationSeconds, + "Failed to parse the seconds to an Int64: \(seconds)." + ) + + throw DecodingError.dataCorrupted(.init( + codingPath: [], + debugDescription: "Invalid proto duration seconds: \(text)" + )) + } + + guard let nanos = Int32(nanoseconds) else { + AILog.warning( + code: .decodedInvalidProtoDurationNanoseconds, + "Failed to parse the nanoseconds to an Int32: \(nanoseconds)." + ) + + throw DecodingError.dataCorrupted(.init( + codingPath: [], + debugDescription: "Invalid proto duration nanoseconds: \(text)" + )) + } + + self.seconds = secs + self.nanos = nanos + } +} diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveServerGoAway.swift b/FirebaseAI/Sources/Types/Public/Live/LiveServerGoAway.swift index 679c38a63b6..1f3950a4869 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveServerGoAway.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveServerGoAway.swift @@ -23,7 +23,7 @@ public struct LiveServerGoAway: Sendable { /// The remaining time before the connection will be terminated as ABORTED. /// The minimal time returned here is specified differently together with /// the rate limits for a given model. - public var timeLeft: TimeInterval? { goAway.timeLeft } + public var timeLeft: TimeInterval? { goAway.timeLeft?.timeInterval } init(_ goAway: GoAway) { self.goAway = goAway From 5670c0969588fdef359a8dfdfb786fa0bbc2b0ce Mon Sep 17 00:00:00 2001 From: Daymon Date: Fri, 26 Sep 2025 13:20:17 -0500 Subject: [PATCH 39/98] Remove unnecessary structs --- .../Internal/Live/ActivityHandling.swift | 35 ------------ .../Types/Internal/Live/EndSensitivity.swift | 33 ----------- .../Internal/Live/RealtimeInputConfig.swift | 55 ------------------- .../Internal/Live/StartSensitivity.swift | 33 ----------- .../Types/Internal/Live/TurnCoverage.swift | 36 ------------ 5 files changed, 192 deletions(-) delete mode 100644 FirebaseAI/Sources/Types/Internal/Live/ActivityHandling.swift delete mode 100644 FirebaseAI/Sources/Types/Internal/Live/EndSensitivity.swift delete mode 100644 FirebaseAI/Sources/Types/Internal/Live/RealtimeInputConfig.swift delete mode 100644 FirebaseAI/Sources/Types/Internal/Live/StartSensitivity.swift delete mode 100644 FirebaseAI/Sources/Types/Internal/Live/TurnCoverage.swift diff --git a/FirebaseAI/Sources/Types/Internal/Live/ActivityHandling.swift b/FirebaseAI/Sources/Types/Internal/Live/ActivityHandling.swift deleted file mode 100644 index 26867a98925..00000000000 --- a/FirebaseAI/Sources/Types/Internal/Live/ActivityHandling.swift +++ /dev/null @@ -1,35 +0,0 @@ -// Copyright 2025 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -import Foundation - -/// The different ways of handling user activity. -@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) -public struct ActivityHandling: EncodableProtoEnum, Hashable, Sendable { - enum Kind: String { - case interrupts = "START_OF_ACTIVITY_INTERRUPTS" - case noInterrupt = "NO_INTERRUPTION" - } - - /// If true, start of activity will interrupt the model's response (also - /// called "barge in"). The model's current response will be cut-off in the - /// moment of the interruption. This is the default behavior. - public static let interrupts = ActivityHandling(kind: .interrupts) - - /// The model's response will not be interrupted. - public static let noInterrupt = ActivityHandling(kind: .noInterrupt) - - /// Returns the raw string representation of the `ActivityHandling` value. - public let rawValue: String -} diff --git a/FirebaseAI/Sources/Types/Internal/Live/EndSensitivity.swift b/FirebaseAI/Sources/Types/Internal/Live/EndSensitivity.swift deleted file mode 100644 index bacf814794d..00000000000 --- a/FirebaseAI/Sources/Types/Internal/Live/EndSensitivity.swift +++ /dev/null @@ -1,33 +0,0 @@ -// Copyright 2025 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -import Foundation - -/// End of speech sensitivity. -@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) -struct EndSensitivity: EncodableProtoEnum, Hashable, Sendable { - enum Kind: String { - case high = "END_SENSITIVITY_HIGH" - case low = "END_SENSITIVITY_LOW" - } - - /// Automatic detection will end speech more often. - public static let high = EndSensitivity(kind: .high) - - /// Automatic detection will end speech less often. - public static let low = EndSensitivity(kind: .low) - - /// Returns the raw string representation of the `EndSensitivity` value. - public let rawValue: String -} diff --git a/FirebaseAI/Sources/Types/Internal/Live/RealtimeInputConfig.swift b/FirebaseAI/Sources/Types/Internal/Live/RealtimeInputConfig.swift deleted file mode 100644 index 08bcfe076f8..00000000000 --- a/FirebaseAI/Sources/Types/Internal/Live/RealtimeInputConfig.swift +++ /dev/null @@ -1,55 +0,0 @@ -// Copyright 2025 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -import Foundation - -/// Configures the realtime input behavior in `BidiGenerateContent`. -@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) -struct RealtimeInputConfig: Encodable { - /// Configures automatic detection of activity. - struct AutomaticActivityDetection: Encodable { - /// If enabled, detected voice and text input count as activity. If - /// disabled, the client must send activity signals. - let disabled: Bool? - - /// Determines how likely speech is to be detected. - let startOfSpeechSensitivity: StartSensitivity? - - /// Determines how likely detected speech is ended. - let endOfSpeechSensitivity: EndSensitivity? - - /// The required duration of detected speech before start-of-speech is - /// committed. The lower this value the more sensitive the start-of-speech - /// detection is and the shorter speech can be recognized. However, this - /// also increases the probability of false positives. - let prefixPaddingMS: Int? - - /// The required duration of detected silence (or non-speech) before - // end-of-speech is committed. The larger this value, the longer speech - // gaps can be without interrupting the user's activity but this will - // increase the model's latency. - let silenceDurationMS: Int? - } - - /// If not set, automatic activity detection is enabled by default. If - /// automatic voice detection is disabled, the client must send activity - /// signals. - let automaticActivityDetection: AutomaticActivityDetection? - - /// Defines what effect activity has. - let activityHandling: ActivityHandling? - - /// Defines which input is included in the user's turn. - let turnCoverage: TurnCoverage? -} diff --git a/FirebaseAI/Sources/Types/Internal/Live/StartSensitivity.swift b/FirebaseAI/Sources/Types/Internal/Live/StartSensitivity.swift deleted file mode 100644 index ef0e1fda073..00000000000 --- a/FirebaseAI/Sources/Types/Internal/Live/StartSensitivity.swift +++ /dev/null @@ -1,33 +0,0 @@ -// Copyright 2025 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -import Foundation - -/// Start of speech sensitivity. -@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) -public struct StartSensitivity: EncodableProtoEnum, Hashable, Sendable { - enum Kind: String { - case high = "START_SENSITIVITY_HIGH" - case low = "START_SENSITIVITY_LOW" - } - - /// Automatic detection will detect the start of speech more often. - public static let high = StartSensitivity(kind: .high) - - /// Automatic detection will detect the start of speech less often. - public static let low = StartSensitivity(kind: .low) - - /// Returns the raw string representation of the `StartSensitivity` value. - public let rawValue: String -} diff --git a/FirebaseAI/Sources/Types/Internal/Live/TurnCoverage.swift b/FirebaseAI/Sources/Types/Internal/Live/TurnCoverage.swift deleted file mode 100644 index 5d69fee78ce..00000000000 --- a/FirebaseAI/Sources/Types/Internal/Live/TurnCoverage.swift +++ /dev/null @@ -1,36 +0,0 @@ -// Copyright 2025 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -import Foundation - -/// Options about which input is included in the user's turn. -@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) -public struct TurnCoverage: EncodableProtoEnum, Hashable, Sendable { - enum Kind: String { - case onlyActivity = "TURN_INCLUDES_ONLY_ACTIVITY" - case allInput = "TURN_INCLUDES_ALL_INPUT" - } - - /// The users turn only includes activity since the last turn, excluding - /// inactivity (e.g. silence on the audio stream). - public static let onlyActivity = TurnCoverage(kind: .onlyActivity) - - /// The users turn includes all realtime input since the last turn, including - /// inactivity (e.g. silence on the audio stream). This is the default - // behavior. - public static let allInput = TurnCoverage(kind: .allInput) - - /// Returns the raw string representation of the `TurnCoverage` value. - public let rawValue: String -} From a9f496109518ebe5bacc9135890dfd1e98799737 Mon Sep 17 00:00:00 2001 From: Daymon Date: Fri, 26 Sep 2025 13:31:05 -0500 Subject: [PATCH 40/98] Use unfair lock for closeError --- .../Types/Internal/Live/AsyncWebSocket.swift | 24 +++++++++---------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/FirebaseAI/Sources/Types/Internal/Live/AsyncWebSocket.swift b/FirebaseAI/Sources/Types/Internal/Live/AsyncWebSocket.swift index 77a3f83f345..980e84f86a2 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/AsyncWebSocket.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/AsyncWebSocket.swift @@ -13,6 +13,7 @@ // limitations under the License. import Foundation +private import FirebaseCoreInternal final class AsyncWebSocket: NSObject, @unchecked Sendable, URLSessionWebSocketDelegate { private let webSocketTask: URLSessionWebSocketTask @@ -20,39 +21,34 @@ final class AsyncWebSocket: NSObject, @unchecked Sendable, URLSessionWebSocketDe private let continuation: AsyncThrowingStream.Continuation private var continuationFinished = false private let continuationLock = NSLock() - - private var _closeError: WebSocketClosedError? = nil - private let closeErrorLock = NSLock() - private(set) var closeError: WebSocketClosedError? { - get { closeErrorLock.withLock { _closeError } } - set { closeErrorLock.withLock { _closeError = newValue } } - } + private var closeError: UnfairLock init(urlSession: URLSession = GenAIURLSession.default, urlRequest: URLRequest) { webSocketTask = urlSession.webSocketTask(with: urlRequest) (stream, continuation) = AsyncThrowingStream .makeStream() + closeError = UnfairLock(nil) } deinit { - webSocketTask.cancel(with: .goingAway, reason: nil) + disconnect() } func connect() -> AsyncThrowingStream { webSocketTask.resume() - closeError = nil + closeError.withLock { $0 = nil } startReceiving() return stream } func disconnect() { - if closeError != nil { return } + if closeError.value() != nil { return } close(code: .goingAway, reason: nil) } func send(_ message: URLSessionWebSocketTask.Message) async throws { - if let closeError { + if let closeError = closeError.value() { throw closeError } try await webSocketTask.send(message) @@ -60,7 +56,7 @@ final class AsyncWebSocket: NSObject, @unchecked Sendable, URLSessionWebSocketDe private func startReceiving() { Task { - while !Task.isCancelled && self.webSocketTask.isOpen && self.closeError == nil { + while !Task.isCancelled && self.webSocketTask.isOpen && self.closeError.value() == nil { do { let message = try await webSocketTask.receive() continuation.yield(message) @@ -73,7 +69,9 @@ final class AsyncWebSocket: NSObject, @unchecked Sendable, URLSessionWebSocketDe private func close(code: URLSessionWebSocketTask.CloseCode, reason: Data?) { let error = WebSocketClosedError(closeCode: code, closeReason: reason) - closeError = error + closeError.withLock { + $0 = error + } webSocketTask.cancel(with: code, reason: reason) From e216c19604e7da7b7665cfa4c4876c683c09a430 Mon Sep 17 00:00:00 2001 From: Daymon Date: Fri, 26 Sep 2025 13:34:49 -0500 Subject: [PATCH 41/98] Use unfair lock for continuation finished --- .../Sources/Types/Internal/Live/AsyncWebSocket.swift | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/FirebaseAI/Sources/Types/Internal/Live/AsyncWebSocket.swift b/FirebaseAI/Sources/Types/Internal/Live/AsyncWebSocket.swift index 980e84f86a2..009bc07dcfc 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/AsyncWebSocket.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/AsyncWebSocket.swift @@ -19,8 +19,7 @@ final class AsyncWebSocket: NSObject, @unchecked Sendable, URLSessionWebSocketDe private let webSocketTask: URLSessionWebSocketTask private let stream: AsyncThrowingStream private let continuation: AsyncThrowingStream.Continuation - private var continuationFinished = false - private let continuationLock = NSLock() + private let continuationFinished = UnfairLock(false) private var closeError: UnfairLock init(urlSession: URLSession = GenAIURLSession.default, urlRequest: URLRequest) { @@ -75,10 +74,10 @@ final class AsyncWebSocket: NSObject, @unchecked Sendable, URLSessionWebSocketDe webSocketTask.cancel(with: code, reason: reason) - continuationLock.withLock { - guard !continuationFinished else { return } + continuationFinished.withLock { isFinished in + guard !isFinished else { return } self.continuation.finish(throwing: error) - self.continuationFinished = true + isFinished = true } } From 1f545da7921ddef044f3453085edd06f024df625 Mon Sep 17 00:00:00 2001 From: Daymon Date: Fri, 26 Sep 2025 13:35:06 -0500 Subject: [PATCH 42/98] Make closeErorr immutable --- FirebaseAI/Sources/Types/Internal/Live/AsyncWebSocket.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FirebaseAI/Sources/Types/Internal/Live/AsyncWebSocket.swift b/FirebaseAI/Sources/Types/Internal/Live/AsyncWebSocket.swift index 009bc07dcfc..1468dcd2eea 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/AsyncWebSocket.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/AsyncWebSocket.swift @@ -20,7 +20,7 @@ final class AsyncWebSocket: NSObject, @unchecked Sendable, URLSessionWebSocketDe private let stream: AsyncThrowingStream private let continuation: AsyncThrowingStream.Continuation private let continuationFinished = UnfairLock(false) - private var closeError: UnfairLock + private let closeError: UnfairLock init(urlSession: URLSession = GenAIURLSession.default, urlRequest: URLRequest) { webSocketTask = urlSession.webSocketTask(with: urlRequest) From 6553b5a00a5e5ca62c43f9616a669d72a040e2c1 Mon Sep 17 00:00:00 2001 From: Daymon Date: Fri, 26 Sep 2025 13:46:48 -0500 Subject: [PATCH 43/98] Update some doc formatting --- .../Types/Public/Live/LiveServerGoAway.swift | 1 + .../Types/Public/Live/LiveServerToolCall.swift | 3 +-- .../Live/LiveServerToolCallCancellation.swift | 3 +-- .../Types/Public/Live/LiveSession.swift | 18 +++++++----------- 4 files changed, 10 insertions(+), 15 deletions(-) diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveServerGoAway.swift b/FirebaseAI/Sources/Types/Public/Live/LiveServerGoAway.swift index 1f3950a4869..9f35595ec96 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveServerGoAway.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveServerGoAway.swift @@ -21,6 +21,7 @@ import Foundation public struct LiveServerGoAway: Sendable { let goAway: GoAway /// The remaining time before the connection will be terminated as ABORTED. + /// /// The minimal time returned here is specified differently together with /// the rate limits for a given model. public var timeLeft: TimeInterval? { goAway.timeLeft?.timeInterval } diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveServerToolCall.swift b/FirebaseAI/Sources/Types/Public/Live/LiveServerToolCall.swift index bba0947e868..e365b39a517 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveServerToolCall.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveServerToolCall.swift @@ -15,8 +15,7 @@ /// Request for the client to execute the provided ``functionCalls``. /// /// The client should return matching ``FunctionResponsePart``, where the `id` fields correspond to -/// individual -/// ``FunctionCallPart``s. +/// individual ``FunctionCallPart``s. @available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) public struct LiveServerToolCall: Sendable { let serverToolCall: BidiGenerateContentToolCall diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveServerToolCallCancellation.swift b/FirebaseAI/Sources/Types/Public/Live/LiveServerToolCallCancellation.swift index 25641c410ba..0fb42ab41c7 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveServerToolCallCancellation.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveServerToolCallCancellation.swift @@ -20,8 +20,7 @@ public struct LiveServerToolCallCancellation: Sendable { let serverToolCallCancellation: BidiGenerateContentToolCallCancellation /// A list of `id`s matching the `id` provided in a previous ``LiveServerToolCall``, where only - /// the provided `id`s should - /// be cancelled. + /// the provided `id`s should be cancelled. public var ids: [String]? { serverToolCallCancellation.ids } init(_ serverToolCallCancellation: BidiGenerateContentToolCallCancellation) { diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift b/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift index 728217ad597..73ba0e803e8 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift @@ -38,9 +38,8 @@ public final class LiveSession: Sendable { /// /// - Parameters: /// - responses: Client generated function results, matched to their respective - /// ``FunctionCallPart`` by the `id` field. + /// ``FunctionCallPart`` by the `id` field. public func functionResponses(_ responses: [FunctionResponsePart]) async { - // TODO: what happens if you send an empty list lol let message = BidiGenerateContentToolResponse( functionResponses: responses.map { $0.functionResponse } ) @@ -55,7 +54,7 @@ public final class LiveSession: Sendable { /// /// - Parameters: /// - audio: Raw 16-bit PCM audio at 16Hz, used to update the model on the client's - /// conversation. + /// conversation. public func sendAudioRealtime(audio: Data) async { // TODO: (b/443984790) address when we add RealtimeInputConfig support let message = BidiGenerateContentRealtimeInput( @@ -97,9 +96,8 @@ public final class LiveSession: Sendable { /// - Parameters: /// - content: Content to append to the current conversation with the model. /// - turnComplete: Whether the server should start generating content with the currently - /// accumulated prompt, or await - /// additional messages before starting generation. By default, the server will await additional - /// messages. + /// accumulated prompt, or await additional messages before starting generation. By default, + /// the server will await additional messages. public func sendContent(_ content: [ModelContent], turnComplete: Bool? = nil) async { let message = BidiGenerateContentClientContent(turns: content, turnComplete: turnComplete) await service.send(.clientContent(message)) @@ -116,12 +114,10 @@ public final class LiveSession: Sendable { /// /// - Parameters: /// - content: Content to append to the current conversation with the model (see - /// ``PartsRepresentable`` for - /// conforming types). + /// ``PartsRepresentable`` for conforming types). /// - turnComplete: Whether the server should start generating content with the currently - /// accumulated prompt, or await - /// additional messages before starting generation. By default, the server will await additional - /// messages. + /// accumulated prompt, or await additional messages before starting generation. By default, + /// the server will await additional messages. public func sendContent(_ parts: any PartsRepresentable..., turnComplete: Bool? = nil) async { await sendContent([ModelContent(parts: parts)], turnComplete: turnComplete) From 8b32ed47a3a71870e0d4b3fd98c5868b52fc0ab2 Mon Sep 17 00:00:00 2001 From: Daymon Date: Fri, 26 Sep 2025 13:48:25 -0500 Subject: [PATCH 44/98] Fix duration compile error --- FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift b/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift index 328d644b557..11f752dad39 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift @@ -226,7 +226,7 @@ actor LiveSessionService { // TODO: (b/444045023) When auto session resumption is enabled, call `connect` again AILog.debug( code: .liveSessionGoingAwaySoon, - "Session expires in: \(message.goAway.timeLeft ?? 0)" + "Session expires in: \(message.goAway.timeLeft?.timeInterval ?? 0)" ) } From 6678be2a0c9221d77d05ff432de28dc4d546114e Mon Sep 17 00:00:00 2001 From: Daymon Date: Fri, 26 Sep 2025 14:06:48 -0500 Subject: [PATCH 45/98] Add docs to AsyncWebSocket --- .../Types/Internal/Live/AsyncWebSocket.swift | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/FirebaseAI/Sources/Types/Internal/Live/AsyncWebSocket.swift b/FirebaseAI/Sources/Types/Internal/Live/AsyncWebSocket.swift index 1468dcd2eea..036a0bcd7c5 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/AsyncWebSocket.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/AsyncWebSocket.swift @@ -15,6 +15,12 @@ import Foundation private import FirebaseCoreInternal +/// Async API for interacting with web sockets. +/// +/// Internally, this just wraps around a `URLSessionWebSocketTask`, and provides a more async friendly +/// interface for sending and consuming data from it. +/// +/// Also surfaces a more fine-grained ``WebSocketClosedError`` for when the web socket is closed. final class AsyncWebSocket: NSObject, @unchecked Sendable, URLSessionWebSocketDelegate { private let webSocketTask: URLSessionWebSocketTask private let stream: AsyncThrowingStream @@ -33,6 +39,7 @@ final class AsyncWebSocket: NSObject, @unchecked Sendable, URLSessionWebSocketDe disconnect() } + /// Starts a connection to the backend, returning a stream for the websocket responses. func connect() -> AsyncThrowingStream { webSocketTask.resume() closeError.withLock { $0 = nil } @@ -40,12 +47,16 @@ final class AsyncWebSocket: NSObject, @unchecked Sendable, URLSessionWebSocketDe return stream } + /// Closes the websocket, if it's not already closed. func disconnect() { if closeError.value() != nil { return } close(code: .goingAway, reason: nil) } + /// Sends a message to the server, through the websocket. + /// + /// If the web socket is closed, this method will throw the error it was closed with. func send(_ message: URLSessionWebSocketTask.Message) async throws { if let closeError = closeError.value() { throw closeError @@ -95,6 +106,10 @@ private extension URLSessionWebSocketTask { } } +/// The websocket was closed. +/// +/// See the `closeReason` for why, or the `errorCode` for the corresponding +/// `URLSessionWebSocketTask.CloseCode`. struct WebSocketClosedError: Error, Sendable, CustomNSError { let closeCode: URLSessionWebSocketTask.CloseCode let closeReason: String From 58aab918ddec0283d658550530e0813ae080606d Mon Sep 17 00:00:00 2001 From: Daymon Date: Fri, 26 Sep 2025 14:08:11 -0500 Subject: [PATCH 46/98] Remove realtime input config --- .../Types/Internal/Live/BidiGenerateContentSetup.swift | 5 ----- 1 file changed, 5 deletions(-) diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetup.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetup.swift index 7a0ff6ba4f5..1a2e999b8b0 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetup.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetup.swift @@ -45,9 +45,6 @@ struct BidiGenerateContentSetup: Encodable { let toolConfig: ToolConfig? - /// Configures the handling of realtime input. - let realtimeInputConfig: RealtimeInputConfig? - /// Input transcription. The transcription is independent to the model turn /// which means it doesn't imply any ordering between transcription and model /// turn. @@ -63,7 +60,6 @@ struct BidiGenerateContentSetup: Encodable { systemInstruction: ModelContent? = nil, tools: [Tool]? = nil, toolConfig: ToolConfig? = nil, - realtimeInputConfig: RealtimeInputConfig? = nil, inputAudioTranscription: BidiAudioTranscriptionConfig? = nil, outputAudioTranscription: BidiAudioTranscriptionConfig? = nil) { self.model = model @@ -71,7 +67,6 @@ struct BidiGenerateContentSetup: Encodable { self.systemInstruction = systemInstruction self.tools = tools self.toolConfig = toolConfig - self.realtimeInputConfig = realtimeInputConfig self.inputAudioTranscription = inputAudioTranscription self.outputAudioTranscription = outputAudioTranscription } From 4c742d5b91ec0ae35eae922b40441247c2af1b22 Mon Sep 17 00:00:00 2001 From: Daymon Date: Fri, 26 Sep 2025 16:09:00 -0500 Subject: [PATCH 47/98] Add an error for lost connection --- .../Types/Internal/Live/AsyncWebSocket.swift | 30 ++++++++++++++++--- .../Internal/Live/LiveSessionService.swift | 28 +++++++++++++++-- 2 files changed, 51 insertions(+), 7 deletions(-) diff --git a/FirebaseAI/Sources/Types/Internal/Live/AsyncWebSocket.swift b/FirebaseAI/Sources/Types/Internal/Live/AsyncWebSocket.swift index 036a0bcd7c5..5f413d212d3 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/AsyncWebSocket.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/AsyncWebSocket.swift @@ -71,14 +71,30 @@ final class AsyncWebSocket: NSObject, @unchecked Sendable, URLSessionWebSocketDe let message = try await webSocketTask.receive() continuation.yield(message) } catch { - close(code: webSocketTask.closeCode, reason: webSocketTask.closeReason) + if let error = webSocketTask.error as? NSError { + close( + code: webSocketTask.closeCode, + reason: webSocketTask.closeReason, + underlyingError: error + ) + } else { + close(code: webSocketTask.closeCode, reason: webSocketTask.closeReason) + } } } } } - private func close(code: URLSessionWebSocketTask.CloseCode, reason: Data?) { - let error = WebSocketClosedError(closeCode: code, closeReason: reason) + private func close( + code: URLSessionWebSocketTask.CloseCode, + reason: Data?, + underlyingError: Error? = nil + ) { + let error = WebSocketClosedError( + closeCode: code, + closeReason: reason, + underlyingError: underlyingError + ) closeError.withLock { $0 = error } @@ -110,14 +126,19 @@ private extension URLSessionWebSocketTask { /// /// See the `closeReason` for why, or the `errorCode` for the corresponding /// `URLSessionWebSocketTask.CloseCode`. +/// +/// In some cases, the `NSUnderlyingErrorKey` key may be populated with an +/// error for additional context. struct WebSocketClosedError: Error, Sendable, CustomNSError { let closeCode: URLSessionWebSocketTask.CloseCode let closeReason: String + let underlyingError: Error? - init(closeCode: URLSessionWebSocketTask.CloseCode, closeReason: Data?) { + init(closeCode: URLSessionWebSocketTask.CloseCode, closeReason: Data?, underlyingError: Error? = nil) { self.closeCode = closeCode self.closeReason = closeReason .flatMap { String(data: $0, encoding: .utf8) } ?? "Unknown reason." + self.underlyingError = underlyingError } var errorCode: Int { closeCode.rawValue } @@ -125,6 +146,7 @@ struct WebSocketClosedError: Error, Sendable, CustomNSError { var errorUserInfo: [String: Any] { [ NSLocalizedDescriptionKey: "WebSocket closed with code \(closeCode.rawValue). Reason: \(closeReason)", + NSUnderlyingErrorKey: underlyingError as Any, ] } } diff --git a/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift b/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift index 11f752dad39..cf193e55500 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift @@ -244,9 +244,14 @@ actor LiveSessionService { if let error = error as? WebSocketClosedError { // only raise an error if the session didn't close normally (ie; the user calling close) if error.closeCode != .goingAway { - let error = LiveSessionUnexpectedClosureError(underlyingError: error) + let closureError: Error + if let error = error.underlyingError as? NSError, error.domain == NSURLErrorDomain, error.code == NSURLErrorNetworkConnectionLost { + closureError = LiveSessionLostConnectionError(underlyingError: error) + } else { + closureError = LiveSessionUnexpectedClosureError(underlyingError: error) + } close() - responseContinuation.finish(throwing: error) + responseContinuation.finish(throwing: closureError) } } else { // an error occurred outside the websocket, so it's likely not closed @@ -365,10 +370,27 @@ public struct LiveSessionUnsupportedMessageError: Error, Sendable, CustomNSError } } +/// The live session was closed, because the network connection was lost. +/// +/// Check the `NSUnderlyingErrorKey` entry in ``errorUserInfo`` for the error that caused this. +public struct LiveSessionLostConnectionError: Error, Sendable, CustomNSError { + let underlyingError: Error + + init(underlyingError: Error) { + self.underlyingError = underlyingError + } + + public var errorUserInfo: [String: Any] { + [ + NSLocalizedDescriptionKey: "The live session lost connection to the server. Cause: \(underlyingError.localizedDescription)", + NSUnderlyingErrorKey: underlyingError, + ] + } +} + /// The live session was closed, but not for a reason the SDK expected. /// /// Check the `NSUnderlyingErrorKey` entry in ``errorUserInfo`` for the error that caused this. -// TODO: two common causes I can think of are api limits and network issues. I wonder if we can catch these somehow, as they seem common enough to surface as actual errors. public struct LiveSessionUnexpectedClosureError: Error, Sendable, CustomNSError { let underlyingError: WebSocketClosedError From 2cd1544f7d715b86701f3e61daaf19e7f7c2a6c2 Mon Sep 17 00:00:00 2001 From: Daymon Date: Fri, 26 Sep 2025 16:09:51 -0500 Subject: [PATCH 48/98] Formatting --- .../Types/Internal/Live/AsyncWebSocket.swift | 15 +++++++-------- .../Types/Internal/Live/LiveSessionService.swift | 3 ++- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/FirebaseAI/Sources/Types/Internal/Live/AsyncWebSocket.swift b/FirebaseAI/Sources/Types/Internal/Live/AsyncWebSocket.swift index 5f413d212d3..4d569fdd462 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/AsyncWebSocket.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/AsyncWebSocket.swift @@ -17,8 +17,8 @@ private import FirebaseCoreInternal /// Async API for interacting with web sockets. /// -/// Internally, this just wraps around a `URLSessionWebSocketTask`, and provides a more async friendly -/// interface for sending and consuming data from it. +/// Internally, this just wraps around a `URLSessionWebSocketTask`, and provides a more async +/// friendly interface for sending and consuming data from it. /// /// Also surfaces a more fine-grained ``WebSocketClosedError`` for when the web socket is closed. final class AsyncWebSocket: NSObject, @unchecked Sendable, URLSessionWebSocketDelegate { @@ -85,11 +85,9 @@ final class AsyncWebSocket: NSObject, @unchecked Sendable, URLSessionWebSocketDe } } - private func close( - code: URLSessionWebSocketTask.CloseCode, - reason: Data?, - underlyingError: Error? = nil - ) { + private func close(code: URLSessionWebSocketTask.CloseCode, + reason: Data?, + underlyingError: Error? = nil) { let error = WebSocketClosedError( closeCode: code, closeReason: reason, @@ -134,7 +132,8 @@ struct WebSocketClosedError: Error, Sendable, CustomNSError { let closeReason: String let underlyingError: Error? - init(closeCode: URLSessionWebSocketTask.CloseCode, closeReason: Data?, underlyingError: Error? = nil) { + init(closeCode: URLSessionWebSocketTask.CloseCode, closeReason: Data?, + underlyingError: Error? = nil) { self.closeCode = closeCode self.closeReason = closeReason .flatMap { String(data: $0, encoding: .utf8) } ?? "Unknown reason." diff --git a/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift b/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift index cf193e55500..4281159df6a 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift @@ -245,7 +245,8 @@ actor LiveSessionService { // only raise an error if the session didn't close normally (ie; the user calling close) if error.closeCode != .goingAway { let closureError: Error - if let error = error.underlyingError as? NSError, error.domain == NSURLErrorDomain, error.code == NSURLErrorNetworkConnectionLost { + if let error = error.underlyingError as? NSError, error.domain == NSURLErrorDomain, + error.code == NSURLErrorNetworkConnectionLost { closureError = LiveSessionLostConnectionError(underlyingError: error) } else { closureError = LiveSessionUnexpectedClosureError(underlyingError: error) From 4fce70842aa3219265416fb0e1335318c97bd11f Mon Sep 17 00:00:00 2001 From: Daymon Date: Fri, 26 Sep 2025 16:13:37 -0500 Subject: [PATCH 49/98] Remove function behavior stuff --- FirebaseAI/Sources/Tool.swift | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/FirebaseAI/Sources/Tool.swift b/FirebaseAI/Sources/Tool.swift index 3ec91d0572a..e051b3b5ea4 100644 --- a/FirebaseAI/Sources/Tool.swift +++ b/FirebaseAI/Sources/Tool.swift @@ -29,9 +29,6 @@ public struct FunctionDeclaration: Sendable { /// Describes the parameters to this function; must be of type `DataType.object`. let parameters: Schema? - // TODO: remove (added for testing) - let behavior: FunctionBehavior? - /// Constructs a new `FunctionDeclaration`. /// /// - Parameters: @@ -43,21 +40,8 @@ public struct FunctionDeclaration: Sendable { /// calls; by default, all parameters are considered required. public init(name: String, description: String, parameters: [String: Schema], optionalParameters: [String] = []) { - self.init( - name: name, - description: description, - parameters: parameters, - optionalParameters: optionalParameters, - functionBehavior: nil - ) - } - - // TODO: remove (added for testing) - public init(name: String, description: String, parameters: [String: Schema], - optionalParameters: [String] = [], functionBehavior: FunctionBehavior? = nil) { self.name = name self.description = description - behavior = functionBehavior self.parameters = Schema.object( properties: parameters, optionalProperties: optionalParameters, @@ -66,12 +50,6 @@ public struct FunctionDeclaration: Sendable { } } -// TODO: remove (added for testing) -public enum FunctionBehavior: String, Sendable, Encodable { - case blocking = "BLOCKING" - case nonBlocking = "NON_BLOCKING" -} - /// A tool that allows the generative model to connect to Google Search to access and incorporate /// up-to-date information from the web into its responses. /// @@ -237,7 +215,6 @@ extension FunctionDeclaration: Encodable { case name case description case parameters - case behavior // TODO: remove (added for testing) } public func encode(to encoder: Encoder) throws { @@ -245,7 +222,6 @@ extension FunctionDeclaration: Encodable { try container.encode(name, forKey: .name) try container.encode(description, forKey: .description) try container.encode(parameters, forKey: .parameters) - try container.encode(behavior, forKey: .behavior) // TODO: remove (added for testing) } } From dd81a84d51235f82f517364cb0001cb688f1706d Mon Sep 17 00:00:00 2001 From: Daymon Date: Fri, 26 Sep 2025 16:25:45 -0500 Subject: [PATCH 50/98] Revert analytics changes from testing --- FirebaseAnalytics.podspec | 2 +- Package.swift | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/FirebaseAnalytics.podspec b/FirebaseAnalytics.podspec index 2445e7942f5..ddfd6bfefcf 100644 --- a/FirebaseAnalytics.podspec +++ b/FirebaseAnalytics.podspec @@ -13,7 +13,7 @@ Pod::Spec.new do |s| s.authors = 'Google, Inc.' s.source = { - :http => 'https://dl.google.com/firebase/ios/analytics/20f7f19c421351ed/FirebaseAnalytics-12.2.0.tar.gz' + :http => 'https://dl.google.com/firebase/ios/analytics/7f774173bfc50ea8/FirebaseAnalytics-12.3.0.tar.gz' } s.cocoapods_version = '>= 1.12.0' diff --git a/Package.swift b/Package.swift index 86ca75b1545..3bcbca83686 100644 --- a/Package.swift +++ b/Package.swift @@ -329,8 +329,8 @@ let package = Package( ), .binaryTarget( name: "FirebaseAnalytics", - url: "https://dl.google.com/firebase/ios/swiftpm/12.2.0/FirebaseAnalytics.zip", - checksum: "f1b07dabcdf3f2b6c495af72baa55e40672a625b8a1b6c631fb43ec74a2ec1ca" + url: "https://dl.google.com/firebase/ios/swiftpm/12.3.0/FirebaseAnalytics.zip", + checksum: "a7fcb34227d6cc0b2db9b1d3f9dd844801e5a28217f20f1daae6c3d2b7d1e8e1" ), .testTarget( name: "AnalyticsSwiftUnit", @@ -1392,7 +1392,7 @@ func googleAppMeasurementDependency() -> Package.Dependency { return .package(url: appMeasurementURL, branch: "main") } - return .package(url: appMeasurementURL, exact: "12.2.0") + return .package(url: appMeasurementURL, exact: "12.3.0") } func abseilDependency() -> Package.Dependency { From a574eb4e0873355bdc581400878819d22ee23f61 Mon Sep 17 00:00:00 2001 From: Daymon Date: Mon, 29 Sep 2025 12:12:03 -0500 Subject: [PATCH 51/98] Fix id decoding --- FirebaseAI/Sources/Types/Internal/InternalPart.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FirebaseAI/Sources/Types/Internal/InternalPart.swift b/FirebaseAI/Sources/Types/Internal/InternalPart.swift index 44abee6baeb..a9d5a2eb810 100644 --- a/FirebaseAI/Sources/Types/Internal/InternalPart.swift +++ b/FirebaseAI/Sources/Types/Internal/InternalPart.swift @@ -139,7 +139,7 @@ extension FunctionCall: Codable { } else { args = JSONObject() } - id = try container.decode(String.self, forKey: .id) + id = try container.decodeIfPresent(String.self, forKey: .id) } } From 712b8b4bdb64382ad5b97669f1d346356cb7b559 Mon Sep 17 00:00:00 2001 From: Daymon Date: Mon, 29 Sep 2025 12:13:23 -0500 Subject: [PATCH 52/98] Rename id to functionId to avoid identifiable collision --- FirebaseAI/Sources/Types/Public/Part.swift | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/FirebaseAI/Sources/Types/Public/Part.swift b/FirebaseAI/Sources/Types/Public/Part.swift index bcfa6d42148..8acf7b12e9a 100644 --- a/FirebaseAI/Sources/Types/Public/Part.swift +++ b/FirebaseAI/Sources/Types/Public/Part.swift @@ -147,9 +147,10 @@ public struct FunctionCallPart: Part { public var isThought: Bool { _isThought ?? false } - /// Unique id of the function call. If present, the returned ``FunctionResponsePart`` - /// should have a matching `id` field. - public var id: String? { functionCall.id } + /// Unique id of the function call. + /// + /// If present, the returned ``FunctionResponsePart`` should have a matching `functionId` field. + public var functionId: String? { functionCall.id } /// Constructs a new function call part. /// @@ -196,7 +197,7 @@ public struct FunctionResponsePart: Part { let thoughtSignature: String? /// Matching `id` for a ``FunctionCallPart``, if one was provided. - public var id: String? { functionResponse.id } + public var functionId: String? { functionResponse.id } /// The name of the function that was called. public var name: String { functionResponse.name } @@ -222,10 +223,10 @@ public struct FunctionResponsePart: Part { /// - Parameters: /// - name: The name of the function that was called. /// - response: The function's response. - /// - id: Matching `id` for a ``FunctionCallPart``, if one was provided. - public init(name: String, response: JSONObject, id: String? = nil) { + /// - functionId: Matching `functionId` for a ``FunctionCallPart``, if one was provided. + public init(name: String, response: JSONObject, functionId: String? = nil) { self.init( - FunctionResponse(name: name, response: response, id: id), + FunctionResponse(name: name, response: response, id: functionId), isThought: nil, thoughtSignature: nil ) From 6ec278b8d48ed060e6395a0d410393564edc9848 Mon Sep 17 00:00:00 2001 From: Daymon Date: Mon, 29 Sep 2025 12:15:37 -0500 Subject: [PATCH 53/98] Fix available version to match usage --- FirebaseAI/Sources/Types/Public/Live/LiveSession.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift b/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift index 73ba0e803e8..d0134cc29e4 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift @@ -21,7 +21,7 @@ import Foundation /// through the incremental API (such as ``sendContent(_:turnComplete:)``). /// /// To create an instance of this class, see ``LiveGenerativeModel``. -@available(macOS 12.0, *) +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) public final class LiveSession: Sendable { private let service: LiveSessionService From ad587399af114519152468887f1fffe6100680ae Mon Sep 17 00:00:00 2001 From: Daymon Date: Mon, 29 Sep 2025 12:16:16 -0500 Subject: [PATCH 54/98] Update reference to id field in docs --- FirebaseAI/Sources/Types/Public/Live/LiveSession.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift b/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift index d0134cc29e4..c4dbb15884f 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift @@ -38,7 +38,7 @@ public final class LiveSession: Sendable { /// /// - Parameters: /// - responses: Client generated function results, matched to their respective - /// ``FunctionCallPart`` by the `id` field. + /// ``FunctionCallPart`` by the `functionId` field. public func functionResponses(_ responses: [FunctionResponsePart]) async { let message = BidiGenerateContentToolResponse( functionResponses: responses.map { $0.functionResponse } From 0f4c55ba0543401fd0c394ffa477409713eeb1e8 Mon Sep 17 00:00:00 2001 From: Daymon Date: Mon, 29 Sep 2025 12:17:29 -0500 Subject: [PATCH 55/98] Update other doc references to id field --- FirebaseAI/Sources/Types/Public/Live/LiveServerToolCall.swift | 2 +- .../Types/Public/Live/LiveServerToolCallCancellation.swift | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveServerToolCall.swift b/FirebaseAI/Sources/Types/Public/Live/LiveServerToolCall.swift index e365b39a517..237ca65eaa3 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveServerToolCall.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveServerToolCall.swift @@ -14,7 +14,7 @@ /// Request for the client to execute the provided ``functionCalls``. /// -/// The client should return matching ``FunctionResponsePart``, where the `id` fields correspond to +/// The client should return matching ``FunctionResponsePart``, where the `functionId` fields correspond to /// individual ``FunctionCallPart``s. @available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) public struct LiveServerToolCall: Sendable { diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveServerToolCallCancellation.swift b/FirebaseAI/Sources/Types/Public/Live/LiveServerToolCallCancellation.swift index 0fb42ab41c7..85d49531284 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveServerToolCallCancellation.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveServerToolCallCancellation.swift @@ -19,8 +19,8 @@ @available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) public struct LiveServerToolCallCancellation: Sendable { let serverToolCallCancellation: BidiGenerateContentToolCallCancellation - /// A list of `id`s matching the `id` provided in a previous ``LiveServerToolCall``, where only - /// the provided `id`s should be cancelled. + /// A list of `functionId`s matching the `functionId` provided in a previous ``LiveServerToolCall``, where only + /// the provided `functionId`s should be cancelled. public var ids: [String]? { serverToolCallCancellation.ids } init(_ serverToolCallCancellation: BidiGenerateContentToolCallCancellation) { From 6855efc8907656905de3e50fc9365659292d88d0 Mon Sep 17 00:00:00 2001 From: Daymon Date: Mon, 29 Sep 2025 12:18:09 -0500 Subject: [PATCH 56/98] formatting --- FirebaseAI/Sources/Types/Public/Live/LiveServerToolCall.swift | 4 ++-- .../Types/Public/Live/LiveServerToolCallCancellation.swift | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveServerToolCall.swift b/FirebaseAI/Sources/Types/Public/Live/LiveServerToolCall.swift index 237ca65eaa3..2ec031bab49 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveServerToolCall.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveServerToolCall.swift @@ -14,8 +14,8 @@ /// Request for the client to execute the provided ``functionCalls``. /// -/// The client should return matching ``FunctionResponsePart``, where the `functionId` fields correspond to -/// individual ``FunctionCallPart``s. +/// The client should return matching ``FunctionResponsePart``, where the `functionId` fields +/// correspond to individual ``FunctionCallPart``s. @available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) public struct LiveServerToolCall: Sendable { let serverToolCall: BidiGenerateContentToolCall diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveServerToolCallCancellation.swift b/FirebaseAI/Sources/Types/Public/Live/LiveServerToolCallCancellation.swift index 85d49531284..3732b27ad2a 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveServerToolCallCancellation.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveServerToolCallCancellation.swift @@ -19,8 +19,8 @@ @available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) public struct LiveServerToolCallCancellation: Sendable { let serverToolCallCancellation: BidiGenerateContentToolCallCancellation - /// A list of `functionId`s matching the `functionId` provided in a previous ``LiveServerToolCall``, where only - /// the provided `functionId`s should be cancelled. + /// A list of `functionId`s matching the `functionId` provided in a previous + /// ``LiveServerToolCall``, where only the provided `functionId`s should be cancelled. public var ids: [String]? { serverToolCallCancellation.ids } init(_ serverToolCallCancellation: BidiGenerateContentToolCallCancellation) { From d1d878ab3ae3cba680857882070b193ba8c4ef63 Mon Sep 17 00:00:00 2001 From: Daymon Date: Mon, 29 Sep 2025 14:00:46 -0500 Subject: [PATCH 57/98] Fix old available version --- FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift b/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift index 4281159df6a..53e83ebf836 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift @@ -28,7 +28,7 @@ import Foundation /// /// This mainly comes into play when we don't want to block developers from sending messages while a /// session is being reloaded. -@available(macOS 12.0, *) +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) actor LiveSessionService { public let responses: AsyncThrowingStream private let responseContinuation: AsyncThrowingStream From e3fb8e8ca3132dd55d3cebfe978593c91de68429 Mon Sep 17 00:00:00 2001 From: Daymon Date: Mon, 29 Sep 2025 15:00:03 -0500 Subject: [PATCH 58/98] Possibly fix concurrency issue --- FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift b/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift index 53e83ebf836..bd3eb1dcc43 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift @@ -293,7 +293,7 @@ actor LiveSessionService { /// Creates a websocket pointing to the backend. /// /// Will apply the required app check and auth headers, as the backend expects them. - private func createWebsocket() async throws -> AsyncWebSocket { + private nonisolated func createWebsocket() async throws -> AsyncWebSocket { let urlString = switch apiConfig.service { case .vertexAI: "wss://firebasevertexai.googleapis.com/ws/google.firebase.vertexai.v1beta.LlmBidiService/BidiGenerateContent/locations/us-central1" From 866b32e5478610672252f476629116aa08e56a48 Mon Sep 17 00:00:00 2001 From: Daymon Date: Tue, 30 Sep 2025 11:40:26 -0500 Subject: [PATCH 59/98] Allow params to be unnamed in realtime apis --- FirebaseAI/Sources/Types/Public/Live/LiveSession.swift | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift b/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift index c4dbb15884f..75a42962993 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift @@ -55,7 +55,7 @@ public final class LiveSession: Sendable { /// - Parameters: /// - audio: Raw 16-bit PCM audio at 16Hz, used to update the model on the client's /// conversation. - public func sendAudioRealtime(audio: Data) async { + public func sendAudioRealtime(_ audio: Data) async { // TODO: (b/443984790) address when we add RealtimeInputConfig support let message = BidiGenerateContentRealtimeInput( audio: InlineData(data: audio, mimeType: "audio/pcm") @@ -68,7 +68,7 @@ public final class LiveSession: Sendable { /// - Parameters: /// - video: Encoded video data, used to update the model on the client's conversation. /// - format: The format that the video was encoded in (eg; `mp4`, `webm`, `wmv`, etc.,). - public func sendVideoRealtime(video: Data, format: String) async { + public func sendVideoRealtime(_ video: Data, format: String) async { let message = BidiGenerateContentRealtimeInput( video: InlineData(data: video, mimeType: "video/\(format)") ) @@ -79,7 +79,7 @@ public final class LiveSession: Sendable { /// /// - Parameters: /// - text: Text content to append to the current client's conversation. - public func sendTextRealtime(text: String) async { + public func sendTextRealtime(_ text: String) async { let message = BidiGenerateContentRealtimeInput(text: text) await service.send(.realtimeInput(message)) } From 3b896bd5ff91afccc15f9b95a525038528d9351b Mon Sep 17 00:00:00 2001 From: Daymon Date: Tue, 30 Sep 2025 13:27:02 -0500 Subject: [PATCH 60/98] Add audio modifier to transcription names --- FirebaseAI/Sources/Types/Public/Live/LiveServerContent.swift | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveServerContent.swift b/FirebaseAI/Sources/Types/Public/Live/LiveServerContent.swift index ad232456368..e0ca756929d 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveServerContent.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveServerContent.swift @@ -64,7 +64,7 @@ public struct LiveServerContent: Sendable { /// /// This field is only populated when an ``AudioTranscriptionConfig`` is provided to /// ``LiveGenerationConfig``. - public var inputTranscription: LiveTranscription? { + public var inputAudioTranscription: LiveTranscription? { serverContent.inputTranscription.map { LiveTranscription($0) } } @@ -75,7 +75,7 @@ public struct LiveServerContent: Sendable { /// /// > Important: Transcripts are independent to the model turn. This means transcripts may /// > come earlier or later than when the model sends the corresponding audio responses. - public var outputTranscription: LiveTranscription? { + public var outputAudioTranscription: LiveTranscription? { serverContent.outputTranscription.map { LiveTranscription($0) } } From 34f427583d30687c205bcd098b8b26f4e671a23c Mon Sep 17 00:00:00 2001 From: Daymon <17409137+daymxn@users.noreply.github.com> Date: Tue, 30 Sep 2025 14:45:31 -0500 Subject: [PATCH 61/98] Update FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift Co-authored-by: Nick Cooke <36927374+ncooke3@users.noreply.github.com> --- FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift b/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift index bd3eb1dcc43..1b3a976ae48 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift @@ -30,7 +30,7 @@ import Foundation /// session is being reloaded. @available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) actor LiveSessionService { - public let responses: AsyncThrowingStream + let responses: AsyncThrowingStream private let responseContinuation: AsyncThrowingStream .Continuation From 30d9896d62f692b09dec466b2c22353b759dd319 Mon Sep 17 00:00:00 2001 From: Daymon <17409137+daymxn@users.noreply.github.com> Date: Tue, 30 Sep 2025 14:45:40 -0500 Subject: [PATCH 62/98] Update FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift Co-authored-by: Nick Cooke <36927374+ncooke3@users.noreply.github.com> --- FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift b/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift index 1b3a976ae48..6695bc15561 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift @@ -105,7 +105,7 @@ actor LiveSessionService { /// If there's any issues while sending the message, details about the issue will be logged. /// /// Since messages are queued syncronously, they are sent in-order. - public func send(_ message: BidiGenerateContentClientMessage) { + func send(_ message: BidiGenerateContentClientMessage) { messageQueueContinuation.yield(message) } From 082ccbdb844828c18a922e0d8a1a8c0de3a29d50 Mon Sep 17 00:00:00 2001 From: Daymon <17409137+daymxn@users.noreply.github.com> Date: Tue, 30 Sep 2025 14:45:48 -0500 Subject: [PATCH 63/98] Update FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift Co-authored-by: Nick Cooke <36927374+ncooke3@users.noreply.github.com> --- FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift b/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift index 6695bc15561..384ad001b2e 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift @@ -113,7 +113,7 @@ actor LiveSessionService { /// /// Seperated into its own function to make it easier to surface a way to call it seperately when /// resuming the same session. - public func connect() { + func connect() { setupTask.cancel() setupTask = Task { [weak self] in try await withCheckedThrowingContinuation { setupContinuation in From 956942ee95944476b2bffce77c583e359cde05c5 Mon Sep 17 00:00:00 2001 From: Daymon <17409137+daymxn@users.noreply.github.com> Date: Tue, 30 Sep 2025 14:45:56 -0500 Subject: [PATCH 64/98] Update FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift Co-authored-by: Nick Cooke <36927374+ncooke3@users.noreply.github.com> --- FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift b/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift index 384ad001b2e..4459a95e9df 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift @@ -104,7 +104,7 @@ actor LiveSessionService { /// /// If there's any issues while sending the message, details about the issue will be logged. /// - /// Since messages are queued syncronously, they are sent in-order. + /// Since messages are queued synchronously, they are sent in-order. func send(_ message: BidiGenerateContentClientMessage) { messageQueueContinuation.yield(message) } From 319ebc2ca476e0cc907f20112fba0ff4271321b8 Mon Sep 17 00:00:00 2001 From: Daymon <17409137+daymxn@users.noreply.github.com> Date: Tue, 30 Sep 2025 14:46:10 -0500 Subject: [PATCH 65/98] Update FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift Co-authored-by: Nick Cooke <36927374+ncooke3@users.noreply.github.com> --- FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift b/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift index 4459a95e9df..796ea0ae314 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift @@ -128,7 +128,7 @@ actor LiveSessionService { /// Cancel any running tasks and close the websocket. /// /// This method is idempotent; if it's already ran once, it will effectively be a no-op. - public func close() { + func close() { setupTask.cancel() responsesTask?.cancel() messageQueueTask?.cancel() From 01f18f23b5c53b2bd4f357280f5e58c087afee78 Mon Sep 17 00:00:00 2001 From: Daymon Date: Tue, 30 Sep 2025 14:50:53 -0500 Subject: [PATCH 66/98] Add comments explaining nested task --- .../Sources/Types/Internal/Live/LiveSessionService.swift | 3 +++ 1 file changed, 3 insertions(+) diff --git a/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift b/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift index 796ea0ae314..6cc984b9a0a 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift @@ -115,8 +115,11 @@ actor LiveSessionService { /// resuming the same session. func connect() { setupTask.cancel() + // we launch the setup task in a seperate task to avoid blocking the parent context setupTask = Task { [weak self] in + // we need a continuation to surface that the setup is complete, while still allowing us to listen to the server try await withCheckedThrowingContinuation { setupContinuation in + // nested task so we can use await Task { [weak self] in guard let self else { return } await self.listenToServer(setupContinuation) From 029ee0d54929a2a77ef196d3a8977baba41b9940 Mon Sep 17 00:00:00 2001 From: Daymon Date: Tue, 30 Sep 2025 14:54:06 -0500 Subject: [PATCH 67/98] Use close instead on connect --- .../Sources/Types/Internal/Live/LiveSessionService.swift | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift b/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift index 6cc984b9a0a..a865ec83cf7 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift @@ -114,7 +114,7 @@ actor LiveSessionService { /// Seperated into its own function to make it easier to surface a way to call it seperately when /// resuming the same session. func connect() { - setupTask.cancel() + close() // we launch the setup task in a seperate task to avoid blocking the parent context setupTask = Task { [weak self] in // we need a continuation to surface that the setup is complete, while still allowing us to listen to the server @@ -148,11 +148,6 @@ actor LiveSessionService { /// /// Will also close out the old websocket and the previous long running tasks. private func listenToServer(_ setupComplete: CheckedContinuation) async { - // close out the existing connections, if any - webSocket?.disconnect() - responsesTask?.cancel() - messageQueueTask?.cancel() - do { webSocket = try await createWebsocket() } catch { From 88fa653a4eaed93cafe6a87d3d859dbe98d58a7b Mon Sep 17 00:00:00 2001 From: Daymon Date: Tue, 30 Sep 2025 14:58:11 -0500 Subject: [PATCH 68/98] Move live session error to public folder --- .../Internal/Live/LiveSessionService.swift | 80 ---------------- .../Types/Public/Live/LiveSessionErrors.swift | 95 +++++++++++++++++++ 2 files changed, 95 insertions(+), 80 deletions(-) create mode 100644 FirebaseAI/Sources/Types/Public/Live/LiveSessionErrors.swift diff --git a/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift b/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift index a865ec83cf7..a2fdc858a6c 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift @@ -346,83 +346,3 @@ actor LiveSessionService { return AsyncWebSocket(urlSession: urlSession, urlRequest: urlRequest) } } - -/// The live model sent a message that the SDK failed to parse. -/// -/// This may indicate that the SDK version needs updating, a model is too old for the current SDK -/// version, or that the model is just -/// not supported. -/// -/// Check the `NSUnderlyingErrorKey` entry in ``errorUserInfo`` for the error that caused this. -public struct LiveSessionUnsupportedMessageError: Error, Sendable, CustomNSError { - let underlyingError: Error - - init(underlyingError: Error) { - self.underlyingError = underlyingError - } - - public var errorUserInfo: [String: Any] { - [ - NSLocalizedDescriptionKey: "Failed to parse a live message from the model. Cause: \(underlyingError.localizedDescription)", - NSUnderlyingErrorKey: underlyingError, - ] - } -} - -/// The live session was closed, because the network connection was lost. -/// -/// Check the `NSUnderlyingErrorKey` entry in ``errorUserInfo`` for the error that caused this. -public struct LiveSessionLostConnectionError: Error, Sendable, CustomNSError { - let underlyingError: Error - - init(underlyingError: Error) { - self.underlyingError = underlyingError - } - - public var errorUserInfo: [String: Any] { - [ - NSLocalizedDescriptionKey: "The live session lost connection to the server. Cause: \(underlyingError.localizedDescription)", - NSUnderlyingErrorKey: underlyingError, - ] - } -} - -/// The live session was closed, but not for a reason the SDK expected. -/// -/// Check the `NSUnderlyingErrorKey` entry in ``errorUserInfo`` for the error that caused this. -public struct LiveSessionUnexpectedClosureError: Error, Sendable, CustomNSError { - let underlyingError: WebSocketClosedError - - init(underlyingError: WebSocketClosedError) { - self.underlyingError = underlyingError - } - - public var errorUserInfo: [String: Any] { - [ - NSLocalizedDescriptionKey: "The live session was closed for some unexpected reason. Cause: \(underlyingError.localizedDescription)", - NSUnderlyingErrorKey: underlyingError, - ] - } -} - -/// The live model refused our request to setup a live session. -/// -/// This can occur due to the model not supporting the requested response modalities, the project -/// not having access to the model, -/// the model being invalid, or some internal error. -/// -/// Check the `NSUnderlyingErrorKey` entry in ``errorUserInfo`` for the error that caused this. -public struct LiveSessionSetupError: Error, Sendable, CustomNSError { - let underlyingError: Error - - init(underlyingError: Error) { - self.underlyingError = underlyingError - } - - public var errorUserInfo: [String: Any] { - [ - NSLocalizedDescriptionKey: "The model did not accept the live session request. Reason: \(underlyingError.localizedDescription)", - NSUnderlyingErrorKey: underlyingError, - ] - } -} diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveSessionErrors.swift b/FirebaseAI/Sources/Types/Public/Live/LiveSessionErrors.swift new file mode 100644 index 00000000000..7e71f71ef8f --- /dev/null +++ b/FirebaseAI/Sources/Types/Public/Live/LiveSessionErrors.swift @@ -0,0 +1,95 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +/// The live model sent a message that the SDK failed to parse. +/// +/// This may indicate that the SDK version needs updating, a model is too old for the current SDK +/// version, or that the model is just +/// not supported. +/// +/// Check the `NSUnderlyingErrorKey` entry in ``errorUserInfo`` for the error that caused this. +public struct LiveSessionUnsupportedMessageError: Error, Sendable, CustomNSError { + let underlyingError: Error + + init(underlyingError: Error) { + self.underlyingError = underlyingError + } + + public var errorUserInfo: [String: Any] { + [ + NSLocalizedDescriptionKey: "Failed to parse a live message from the model. Cause: \(underlyingError.localizedDescription)", + NSUnderlyingErrorKey: underlyingError, + ] + } +} + +/// The live session was closed, because the network connection was lost. +/// +/// Check the `NSUnderlyingErrorKey` entry in ``errorUserInfo`` for the error that caused this. +public struct LiveSessionLostConnectionError: Error, Sendable, CustomNSError { + let underlyingError: Error + + init(underlyingError: Error) { + self.underlyingError = underlyingError + } + + public var errorUserInfo: [String: Any] { + [ + NSLocalizedDescriptionKey: "The live session lost connection to the server. Cause: \(underlyingError.localizedDescription)", + NSUnderlyingErrorKey: underlyingError, + ] + } +} + +/// The live session was closed, but not for a reason the SDK expected. +/// +/// Check the `NSUnderlyingErrorKey` entry in ``errorUserInfo`` for the error that caused this. +public struct LiveSessionUnexpectedClosureError: Error, Sendable, CustomNSError { + let underlyingError: WebSocketClosedError + + init(underlyingError: WebSocketClosedError) { + self.underlyingError = underlyingError + } + + public var errorUserInfo: [String: Any] { + [ + NSLocalizedDescriptionKey: "The live session was closed for some unexpected reason. Cause: \(underlyingError.localizedDescription)", + NSUnderlyingErrorKey: underlyingError, + ] + } +} + +/// The live model refused our request to setup a live session. +/// +/// This can occur due to the model not supporting the requested response modalities, the project +/// not having access to the model, +/// the model being invalid, or some internal error. +/// +/// Check the `NSUnderlyingErrorKey` entry in ``errorUserInfo`` for the error that caused this. +public struct LiveSessionSetupError: Error, Sendable, CustomNSError { + let underlyingError: Error + + init(underlyingError: Error) { + self.underlyingError = underlyingError + } + + public var errorUserInfo: [String: Any] { + [ + NSLocalizedDescriptionKey: "The model did not accept the live session request. Reason: \(underlyingError.localizedDescription)", + NSUnderlyingErrorKey: underlyingError, + ] + } +} From 633200918bdf521ce9c66b1138692a5ed4ef8270 Mon Sep 17 00:00:00 2001 From: Daymon Date: Tue, 30 Sep 2025 15:04:44 -0500 Subject: [PATCH 69/98] Use failable initializers --- .../Internal/Live/LiveSessionService.swift | 2 +- .../Types/Public/Live/LiveServerMessage.swift | 21 ++++++++++--------- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift b/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift index a2fdc858a6c..4155234811b 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift @@ -219,7 +219,7 @@ actor LiveSessionService { resumed = true setupComplete.resume() } - } else if let liveMessage = LiveServerMessage.tryFrom(response) { + } else if let liveMessage = LiveServerMessage(from: response) { if case let .goAway(message) = liveMessage.messageType { // TODO: (b/444045023) When auto session resumption is enabled, call `connect` again AILog.debug( diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveServerMessage.swift b/FirebaseAI/Sources/Types/Public/Live/LiveServerMessage.swift index 50cef0c1e6d..d0f15b7d386 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveServerMessage.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveServerMessage.swift @@ -44,30 +44,31 @@ public struct LiveServerMessage: Sendable { @available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) extension LiveServerMessage { - static func tryFrom(_ serverMessage: BidiGenerateContentServerMessage) -> Self? { - guard let messageType = LiveServerMessage.MessageType.tryFrom(serverMessage.messageType) else { + init?(from serverMessage: BidiGenerateContentServerMessage) { + guard let messageType = LiveServerMessage.MessageType(from: serverMessage.messageType) else { return nil } - return LiveServerMessage(serverMessage: serverMessage, messageType: messageType) + self.serverMessage = serverMessage + self.messageType = messageType } } @available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) extension LiveServerMessage.MessageType { - static func tryFrom(_ serverMessage: BidiGenerateContentServerMessage.MessageType) -> Self? { - return switch serverMessage { + init?(from serverMessage: BidiGenerateContentServerMessage.MessageType) { + switch serverMessage { case .setupComplete: // this is handled internally, and should not be surfaced to users - nil + return nil case let .serverContent(msg): - .content(LiveServerContent(msg)) + self = .content(LiveServerContent(msg)) case let .toolCall(msg): - .toolCall(LiveServerToolCall(msg)) + self = .toolCall(LiveServerToolCall(msg)) case let .toolCallCancellation(msg): - .toolCallCancellation(LiveServerToolCallCancellation(msg)) + self = .toolCallCancellation(LiveServerToolCallCancellation(msg)) case let .goAway(msg): - .goAway(LiveServerGoAway(msg)) + self = .goAway(LiveServerGoAway(msg)) } } } From bba76c6a0b062dfdef33e06ec557f6ebf4e6b4ed Mon Sep 17 00:00:00 2001 From: Daymon Date: Tue, 30 Sep 2025 15:04:51 -0500 Subject: [PATCH 70/98] Formatting on comment --- .../Sources/Types/Internal/Live/LiveSessionService.swift | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift b/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift index 4155234811b..e15d8d2d262 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift @@ -117,7 +117,8 @@ actor LiveSessionService { close() // we launch the setup task in a seperate task to avoid blocking the parent context setupTask = Task { [weak self] in - // we need a continuation to surface that the setup is complete, while still allowing us to listen to the server + // we need a continuation to surface that the setup is complete, while still allowing us to + // listen to the server try await withCheckedThrowingContinuation { setupContinuation in // nested task so we can use await Task { [weak self] in From 7dacdc2fdc95357009714c5b4bfc86b745a93ed3 Mon Sep 17 00:00:00 2001 From: Daymon <17409137+daymxn@users.noreply.github.com> Date: Tue, 30 Sep 2025 15:07:35 -0500 Subject: [PATCH 71/98] Update FirebaseAI/Sources/Types/Public/Live/LiveServerContent.swift Co-authored-by: Nick Cooke <36927374+ncooke3@users.noreply.github.com> --- FirebaseAI/Sources/Types/Public/Live/LiveServerContent.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveServerContent.swift b/FirebaseAI/Sources/Types/Public/Live/LiveServerContent.swift index e0ca756929d..aede2ea2cc9 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveServerContent.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveServerContent.swift @@ -55,7 +55,7 @@ public struct LiveServerContent: Sendable { /// /// Note that if the model was ``interrupted``, this will not be set. The model will go from /// ``interrupted`` -> ``turnComplete``. - public var generationComplete: Bool? { serverContent.generationComplete } + public var isGenerationComplete: Bool? { serverContent.generationComplete } /// Metadata specifing the sources used to ground generated content. public var groundingMetadata: GroundingMetadata? { serverContent.groundingMetadata } From e8e6370ecd808cfc541b80adc1175d407e8a6bc2 Mon Sep 17 00:00:00 2001 From: Daymon <17409137+daymxn@users.noreply.github.com> Date: Tue, 30 Sep 2025 15:07:43 -0500 Subject: [PATCH 72/98] Update FirebaseAI/Sources/Types/Public/Live/LiveServerContent.swift Co-authored-by: Nick Cooke <36927374+ncooke3@users.noreply.github.com> --- FirebaseAI/Sources/Types/Public/Live/LiveServerContent.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveServerContent.swift b/FirebaseAI/Sources/Types/Public/Live/LiveServerContent.swift index aede2ea2cc9..a4f40dc29ba 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveServerContent.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveServerContent.swift @@ -40,7 +40,7 @@ public struct LiveServerContent: Sendable { /// /// If the client is playing out the content in realtime, this is a /// good signal to stop and empty the current queue. - public var interrupted: Bool? { serverContent.interrupted } + public var wasInterrupted: Bool? { serverContent.interrupted } /// The model has finished _generating_ data for the current turn. /// From 57e2d408c4e4c51e3adb4ab9fb9935007ac506c5 Mon Sep 17 00:00:00 2001 From: Daymon Date: Tue, 30 Sep 2025 15:10:05 -0500 Subject: [PATCH 73/98] Update docs for boolean value changes --- .../Types/Public/Live/LiveServerContent.swift | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveServerContent.swift b/FirebaseAI/Sources/Types/Public/Live/LiveServerContent.swift index a4f40dc29ba..2359808ba14 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveServerContent.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveServerContent.swift @@ -45,16 +45,13 @@ public struct LiveServerContent: Sendable { /// The model has finished _generating_ data for the current turn. /// /// For realtime playback, there will be a delay between when the model finishes generating - /// content - /// and the client has finished playing back the generated content. ``generationComplete`` - /// indicates - /// that the model is done generating data, while ``turnComplete`` indicates the model is waiting - /// for - /// additional client messages. Sending a message during this delay may cause an ``interrupted`` - /// message to be sent. + /// content and the client has finished playing back the generated content. ``generationComplete`` + /// indicates that the model is done generating data, while ``turnComplete`` indicates the model + /// is waiting for additional client messages. Sending a message during this delay may cause a + /// ``wasInterrupted`` message to be sent. /// - /// Note that if the model was ``interrupted``, this will not be set. The model will go from - /// ``interrupted`` -> ``turnComplete``. + /// Note that if the model ``wasInterrupted``, this will not be set. The model will go from + /// ``wasInterrupted`` -> ``turnComplete``. public var isGenerationComplete: Bool? { serverContent.generationComplete } /// Metadata specifing the sources used to ground generated content. From ed06c34aa78f0739d2aed432a01c54ec47b84031 Mon Sep 17 00:00:00 2001 From: Daymon <17409137+daymxn@users.noreply.github.com> Date: Tue, 30 Sep 2025 15:13:57 -0500 Subject: [PATCH 74/98] Update FirebaseAI/Sources/Types/Public/Live/LiveServerContent.swift Co-authored-by: Nick Cooke <36927374+ncooke3@users.noreply.github.com> --- FirebaseAI/Sources/Types/Public/Live/LiveServerContent.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveServerContent.swift b/FirebaseAI/Sources/Types/Public/Live/LiveServerContent.swift index 2359808ba14..078f6daf297 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveServerContent.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveServerContent.swift @@ -34,7 +34,7 @@ public struct LiveServerContent: Sendable { /// /// Can be set alongside ``content``, indicating that the ``content`` is /// the last in the turn. - public var turnComplete: Bool? { serverContent.turnComplete } + public var isTurnComplete: Bool? { serverContent.turnComplete } /// The model was interrupted by a client message while generating data. /// From 68472c4135fbcb4c7e2f37c099a96c7a63da0b57 Mon Sep 17 00:00:00 2001 From: Daymon Date: Tue, 30 Sep 2025 15:14:36 -0500 Subject: [PATCH 75/98] Update docs for turn complete --- FirebaseAI/Sources/Types/Public/Live/LiveServerContent.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveServerContent.swift b/FirebaseAI/Sources/Types/Public/Live/LiveServerContent.swift index 078f6daf297..1e85ed8a85d 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveServerContent.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveServerContent.swift @@ -46,7 +46,7 @@ public struct LiveServerContent: Sendable { /// /// For realtime playback, there will be a delay between when the model finishes generating /// content and the client has finished playing back the generated content. ``generationComplete`` - /// indicates that the model is done generating data, while ``turnComplete`` indicates the model + /// indicates that the model is done generating data, while ``isturnComplete`` indicates the model /// is waiting for additional client messages. Sending a message during this delay may cause a /// ``wasInterrupted`` message to be sent. /// From 70c9f197e81ea4ca31525d4fb1be88baf6ec532a Mon Sep 17 00:00:00 2001 From: Daymon <17409137+daymxn@users.noreply.github.com> Date: Tue, 30 Sep 2025 15:14:59 -0500 Subject: [PATCH 76/98] Update FirebaseAI/Sources/Types/Internal/Live/AsyncWebSocket.swift Co-authored-by: Andrew Heard --- FirebaseAI/Sources/Types/Internal/Live/AsyncWebSocket.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FirebaseAI/Sources/Types/Internal/Live/AsyncWebSocket.swift b/FirebaseAI/Sources/Types/Internal/Live/AsyncWebSocket.swift index 4d569fdd462..09248755ee2 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/AsyncWebSocket.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/AsyncWebSocket.swift @@ -49,7 +49,7 @@ final class AsyncWebSocket: NSObject, @unchecked Sendable, URLSessionWebSocketDe /// Closes the websocket, if it's not already closed. func disconnect() { - if closeError.value() != nil { return } + guard closeError.value() == nil else { return } close(code: .goingAway, reason: nil) } From d4872794ed2ad2dd77fc0334b52b469eacf1e80d Mon Sep 17 00:00:00 2001 From: Daymon <17409137+daymxn@users.noreply.github.com> Date: Tue, 30 Sep 2025 15:15:15 -0500 Subject: [PATCH 77/98] Update FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientContent.swift Co-authored-by: Andrew Heard --- .../Types/Internal/Live/BidiGenerateContentClientContent.swift | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientContent.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientContent.swift index a24944d83fd..459aa258cc3 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientContent.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientContent.swift @@ -19,7 +19,8 @@ import Foundation /// history and used as part of the prompt to the model to generate content. /// /// A message here will interrupt any current model generation. -@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, *) +@available(watchOS, unavailable) struct BidiGenerateContentClientContent: Encodable { /// The content appended to the current conversation with the model. /// From 474c64632d4a291c09f83e42800c31feac7ac700 Mon Sep 17 00:00:00 2001 From: Daymon <17409137+daymxn@users.noreply.github.com> Date: Tue, 30 Sep 2025 15:15:25 -0500 Subject: [PATCH 78/98] Update FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientMessage.swift Co-authored-by: Andrew Heard --- .../Types/Internal/Live/BidiGenerateContentClientMessage.swift | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientMessage.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientMessage.swift index d4e47982af1..1d530dc00c0 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientMessage.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientMessage.swift @@ -15,7 +15,8 @@ import Foundation /// Messages sent by the client in the BidiGenerateContent RPC call. -@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, *) +@available(watchOS, unavailable) enum BidiGenerateContentClientMessage { /// Message to be sent in the first and only first client message. case setup(BidiGenerateContentSetup) From 29dfd73ec05ff8e0be3f9ab035ae53f3df2a50c9 Mon Sep 17 00:00:00 2001 From: Daymon <17409137+daymxn@users.noreply.github.com> Date: Tue, 30 Sep 2025 15:16:41 -0500 Subject: [PATCH 79/98] Update FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientMessage.swift Co-authored-by: Andrew Heard --- .../BidiGenerateContentClientMessage.swift | 26 +++---------------- 1 file changed, 3 insertions(+), 23 deletions(-) diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientMessage.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientMessage.swift index 1d530dc00c0..25b8869e137 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientMessage.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientMessage.swift @@ -31,26 +31,6 @@ enum BidiGenerateContentClientMessage { case toolResponse(BidiGenerateContentToolResponse) } -@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) -extension BidiGenerateContentClientMessage: Encodable { - enum CodingKeys: CodingKey { - case setup - case clientContent - case realtimeInput - case toolResponse - } - - func encode(to encoder: any Encoder) throws { - var container = encoder.container(keyedBy: CodingKeys.self) - switch self { - case let .setup(setup): - try container.encode(setup, forKey: .setup) - case let .clientContent(clientContent): - try container.encode(clientContent, forKey: .clientContent) - case let .realtimeInput(realtimeInput): - try container.encode(realtimeInput, forKey: .realtimeInput) - case let .toolResponse(toolResponse): - try container.encode(toolResponse, forKey: .toolResponse) - } - } -} +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, *) +@available(watchOS, unavailable) +extension BidiGenerateContentClientMessage: Encodable {} From a7f5028b16c66bbb743a0cb1e92fac1e7e488926 Mon Sep 17 00:00:00 2001 From: Daymon <17409137+daymxn@users.noreply.github.com> Date: Tue, 30 Sep 2025 15:16:56 -0500 Subject: [PATCH 80/98] Update FirebaseAI/Sources/Types/Public/Live/LiveSessionErrors.swift Co-authored-by: Andrew Heard --- FirebaseAI/Sources/Types/Public/Live/LiveSessionErrors.swift | 2 ++ 1 file changed, 2 insertions(+) diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveSessionErrors.swift b/FirebaseAI/Sources/Types/Public/Live/LiveSessionErrors.swift index 7e71f71ef8f..c7f2fa4aeea 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveSessionErrors.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveSessionErrors.swift @@ -57,6 +57,8 @@ public struct LiveSessionLostConnectionError: Error, Sendable, CustomNSError { /// The live session was closed, but not for a reason the SDK expected. /// /// Check the `NSUnderlyingErrorKey` entry in ``errorUserInfo`` for the error that caused this. +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, *) +@available(watchOS, unavailable) public struct LiveSessionUnexpectedClosureError: Error, Sendable, CustomNSError { let underlyingError: WebSocketClosedError From 2429404809d1665e9de3fa6052d2ec1011441f5c Mon Sep 17 00:00:00 2001 From: Daymon <17409137+daymxn@users.noreply.github.com> Date: Tue, 30 Sep 2025 15:17:03 -0500 Subject: [PATCH 81/98] Update FirebaseAI/Sources/Types/Public/Live/LiveSessionErrors.swift Co-authored-by: Andrew Heard --- FirebaseAI/Sources/Types/Public/Live/LiveSessionErrors.swift | 2 ++ 1 file changed, 2 insertions(+) diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveSessionErrors.swift b/FirebaseAI/Sources/Types/Public/Live/LiveSessionErrors.swift index c7f2fa4aeea..16d4fbc9d81 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveSessionErrors.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveSessionErrors.swift @@ -81,6 +81,8 @@ public struct LiveSessionUnexpectedClosureError: Error, Sendable, CustomNSError /// the model being invalid, or some internal error. /// /// Check the `NSUnderlyingErrorKey` entry in ``errorUserInfo`` for the error that caused this. +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, *) +@available(watchOS, unavailable) public struct LiveSessionSetupError: Error, Sendable, CustomNSError { let underlyingError: Error From 0939793cb3072622e2308f75e5ee3a8166ca2ad4 Mon Sep 17 00:00:00 2001 From: Daymon <17409137+daymxn@users.noreply.github.com> Date: Tue, 30 Sep 2025 15:17:10 -0500 Subject: [PATCH 82/98] Update FirebaseAI/Sources/Types/Public/Live/LiveTranscription.swift Co-authored-by: Andrew Heard --- FirebaseAI/Sources/Types/Public/Live/LiveTranscription.swift | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveTranscription.swift b/FirebaseAI/Sources/Types/Public/Live/LiveTranscription.swift index 72dec9d21f7..1ab455d4b9d 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveTranscription.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveTranscription.swift @@ -13,7 +13,8 @@ // limitations under the License. /// Text transcription of some audio form during a live interaction with the model. -@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, *) +@available(watchOS, unavailable) public struct LiveTranscription: Sendable { let transcript: BidiGenerateContentTranscription /// Text representing the model's interpretation of what the audio said. From 407ee640dbe1bafb8d1ab1f01897ab62d34f16e3 Mon Sep 17 00:00:00 2001 From: Daymon <17409137+daymxn@users.noreply.github.com> Date: Tue, 30 Sep 2025 15:17:20 -0500 Subject: [PATCH 83/98] Update FirebaseAI/Sources/Types/Public/Live/SpeechConfig.swift Co-authored-by: Andrew Heard --- FirebaseAI/Sources/Types/Public/Live/SpeechConfig.swift | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/FirebaseAI/Sources/Types/Public/Live/SpeechConfig.swift b/FirebaseAI/Sources/Types/Public/Live/SpeechConfig.swift index 6e4497b1d36..67f4799f6e4 100644 --- a/FirebaseAI/Sources/Types/Public/Live/SpeechConfig.swift +++ b/FirebaseAI/Sources/Types/Public/Live/SpeechConfig.swift @@ -15,7 +15,8 @@ import Foundation /// Configuration for controlling the voice of the model during conversation. -@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, *) +@available(watchOS, unavailable) public struct SpeechConfig: Sendable { let speechConfig: BidiSpeechConfig From fe885aaf1416aea03a2a5a62d6cb78bbf751fe9e Mon Sep 17 00:00:00 2001 From: Daymon Date: Tue, 30 Sep 2025 15:21:04 -0500 Subject: [PATCH 84/98] Update available versions to exclude watchos --- .../Internal/Live/BidiGenerateContentRealtimeInput.swift | 3 ++- .../Internal/Live/BidiGenerateContentServerContent.swift | 3 ++- .../Internal/Live/BidiGenerateContentServerMessage.swift | 6 ++++-- .../Types/Internal/Live/BidiGenerateContentSetup.swift | 3 ++- .../Internal/Live/BidiGenerateContentSetupComplete.swift | 3 ++- .../Internal/Live/BidiGenerateContentToolCall.swift | 3 ++- .../Live/BidiGenerateContentToolCallCancellation.swift | 3 ++- .../Internal/Live/BidiGenerateContentToolResponse.swift | 3 ++- .../Internal/Live/BidiGenerateContentTranscription.swift | 3 ++- .../Types/Internal/Live/BidiGenerationConfig.swift | 3 ++- .../Sources/Types/Internal/Live/BidiSpeechConfig.swift | 2 ++ FirebaseAI/Sources/Types/Internal/Live/GoAway.swift | 3 ++- .../Sources/Types/Internal/Live/LiveSessionService.swift | 3 ++- FirebaseAI/Sources/Types/Internal/Live/VoiceConfig.swift | 8 ++++++++ .../Types/Public/Live/AudioTranscriptionConfig.swift | 3 ++- .../Sources/Types/Public/Live/LiveGenerationConfig.swift | 3 ++- .../Sources/Types/Public/Live/LiveGenerativeModel.swift | 3 ++- .../Sources/Types/Public/Live/LiveServerContent.swift | 3 ++- .../Sources/Types/Public/Live/LiveServerGoAway.swift | 3 ++- .../Sources/Types/Public/Live/LiveServerMessage.swift | 9 ++++++--- .../Sources/Types/Public/Live/LiveServerToolCall.swift | 3 ++- .../Public/Live/LiveServerToolCallCancellation.swift | 3 ++- FirebaseAI/Sources/Types/Public/Live/LiveSession.swift | 3 ++- .../Sources/Types/Public/Live/LiveSessionErrors.swift | 4 ++++ 24 files changed, 62 insertions(+), 24 deletions(-) diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentRealtimeInput.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentRealtimeInput.swift index 7566228282e..2c0c977bafa 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentRealtimeInput.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentRealtimeInput.swift @@ -28,7 +28,8 @@ import Foundation /// to optimize for a fast start of the response from the model. /// - Is always assumed to be the user's input (cannot be used to populate /// conversation history). -@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, *) +@available(watchOS, unavailable) struct BidiGenerateContentRealtimeInput: Encodable { /// These form the realtime audio input stream. let audio: InlineData? diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerContent.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerContent.swift index 98a4b8c42e9..648d7a09ed8 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerContent.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerContent.swift @@ -19,7 +19,8 @@ import Foundation /// /// Content is generated as quickly as possible, and not in realtime. Clients /// may choose to buffer and play it out in realtime. -@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, *) +@available(watchOS, unavailable) struct BidiGenerateContentServerContent: Decodable, Sendable { /// The content that the model has generated as part of the current /// conversation with the user. diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift index 0147ecf34dd..bcf58b5b73d 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift @@ -15,7 +15,8 @@ import Foundation /// Response message for BidiGenerateContent RPC call. -@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, *) +@available(watchOS, unavailable) struct BidiGenerateContentServerMessage: Sendable { /// The type of the message. enum MessageType: Sendable { @@ -47,7 +48,8 @@ struct BidiGenerateContentServerMessage: Sendable { // MARK: - Decodable -@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, *) +@available(watchOS, unavailable) extension BidiGenerateContentServerMessage: Decodable { enum CodingKeys: String, CodingKey { case setupComplete diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetup.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetup.swift index 1a2e999b8b0..4f5fff00d07 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetup.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetup.swift @@ -20,7 +20,8 @@ import Foundation /// /// Clients should wait for a `BidiGenerateContentSetupComplete` message before /// sending any additional messages. -@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, *) +@available(watchOS, unavailable) struct BidiGenerateContentSetup: Encodable { /// The fully qualified name of the publisher model. /// diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetupComplete.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetupComplete.swift index acf5a0572fe..54449782060 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetupComplete.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetupComplete.swift @@ -15,5 +15,6 @@ import Foundation /// Sent in response to a `BidiGenerateContentSetup` message from the client. -@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, *) +@available(watchOS, unavailable) struct BidiGenerateContentSetupComplete: Decodable, Sendable {} diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolCall.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolCall.swift index 18ea0336500..4c34e6367e9 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolCall.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolCall.swift @@ -16,7 +16,8 @@ import Foundation /// Request for the client to execute the `function_calls` and return the /// responses with the matching `id`s. -@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, *) +@available(watchOS, unavailable) struct BidiGenerateContentToolCall: Decodable, Sendable { /// The function call to be executed. let functionCalls: [FunctionCall]? diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolCallCancellation.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolCallCancellation.swift index 5cd3d616c15..48bc991c1fa 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolCallCancellation.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolCallCancellation.swift @@ -19,7 +19,8 @@ import Foundation /// cancelled. If there were side-effects to those tool calls, clients may /// attempt to undo the tool calls. This message occurs only in cases where the /// clients interrupt server turns. -@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, *) +@available(watchOS, unavailable) struct BidiGenerateContentToolCallCancellation: Decodable, Sendable { /// The ids of the tool calls to be cancelled. let ids: [String]? diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolResponse.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolResponse.swift index 8b4e4ba48b2..c9d2506895b 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolResponse.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolResponse.swift @@ -22,7 +22,8 @@ import Foundation /// calling happens by exchanging the `Content` parts, while in the bidi /// GenerateContent APIs function calling happens over these dedicated set of /// messages. -@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, *) +@available(watchOS, unavailable) struct BidiGenerateContentToolResponse: Encodable { /// The response to the function calls. let functionResponses: [FunctionResponse]? diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentTranscription.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentTranscription.swift index 4c5cb965b2a..652799edf9d 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentTranscription.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentTranscription.swift @@ -12,7 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, *) +@available(watchOS, unavailable) struct BidiGenerateContentTranscription: Decodable, Sendable { let text: String? } diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerationConfig.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerationConfig.swift index 5226e2ec79a..a3a3e8a9f99 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerationConfig.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerationConfig.swift @@ -15,7 +15,8 @@ import Foundation /// Configuration options for live content generation. -@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, *) +@available(watchOS, unavailable) struct BidiGenerationConfig: Encodable, Sendable { let temperature: Float? let topP: Float? diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiSpeechConfig.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiSpeechConfig.swift index be27b499c31..80e7d341ef7 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiSpeechConfig.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiSpeechConfig.swift @@ -15,6 +15,8 @@ import Foundation /// Speech generation config. +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, *) +@available(watchOS, unavailable) struct BidiSpeechConfig: Encodable, Sendable { /// The configuration for the speaker to use. let voiceConfig: VoiceConfig diff --git a/FirebaseAI/Sources/Types/Internal/Live/GoAway.swift b/FirebaseAI/Sources/Types/Internal/Live/GoAway.swift index 6fa046e7e61..f5c858b8b45 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/GoAway.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/GoAway.swift @@ -15,7 +15,8 @@ import Foundation /// Server will not be able to service client soon. -@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, *) +@available(watchOS, unavailable) struct GoAway: Decodable, Sendable { /// The remaining time before the connection will be terminated as ABORTED. /// The minimal time returned here is specified differently together with diff --git a/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift b/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift index e15d8d2d262..c166bb5efc2 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift @@ -28,7 +28,8 @@ import Foundation /// /// This mainly comes into play when we don't want to block developers from sending messages while a /// session is being reloaded. -@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, *) +@available(watchOS, unavailable) actor LiveSessionService { let responses: AsyncThrowingStream private let responseContinuation: AsyncThrowingStream diff --git a/FirebaseAI/Sources/Types/Internal/Live/VoiceConfig.swift b/FirebaseAI/Sources/Types/Internal/Live/VoiceConfig.swift index 98d17a2a906..0e6790c03f2 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/VoiceConfig.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/VoiceConfig.swift @@ -15,6 +15,8 @@ import Foundation /// Configuration for the speaker to use. +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, *) +@available(watchOS, unavailable) enum VoiceConfig { /// Configuration for the prebuilt voice to use. case prebuiltVoiceConfig(PrebuiltVoiceConfig) @@ -27,6 +29,8 @@ enum VoiceConfig { /// /// Not just a string on the parent proto, because there'll likely be a lot /// more options here. +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, *) +@available(watchOS, unavailable) struct PrebuiltVoiceConfig: Encodable, Sendable { /// The name of the preset voice to use. let voiceName: String @@ -37,6 +41,8 @@ struct PrebuiltVoiceConfig: Encodable, Sendable { } /// The configuration for the custom voice to use. +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, *) +@available(watchOS, unavailable) struct CustomVoiceConfig: Encodable, Sendable { /// The sample of the custom voice, in pcm16 s16e format. let customVoiceSample: Data @@ -48,6 +54,8 @@ struct CustomVoiceConfig: Encodable, Sendable { // MARK: - Encodable conformance +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, *) +@available(watchOS, unavailable) extension VoiceConfig: Encodable { enum CodingKeys: CodingKey { case prebuiltVoiceConfig diff --git a/FirebaseAI/Sources/Types/Public/Live/AudioTranscriptionConfig.swift b/FirebaseAI/Sources/Types/Public/Live/AudioTranscriptionConfig.swift index 7058e6d6091..aaba70471f9 100644 --- a/FirebaseAI/Sources/Types/Public/Live/AudioTranscriptionConfig.swift +++ b/FirebaseAI/Sources/Types/Public/Live/AudioTranscriptionConfig.swift @@ -17,7 +17,8 @@ /// While there are not currently any options, this will likely change in the future. For now, just /// providing an instance of this struct will enable audio transcriptions for the corresponding /// input or output fields. -@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, *) +@available(watchOS, unavailable) public struct AudioTranscriptionConfig: Sendable { let audioTranscriptionConfig: BidiAudioTranscriptionConfig diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveGenerationConfig.swift b/FirebaseAI/Sources/Types/Public/Live/LiveGenerationConfig.swift index a1e5398a044..12e88047c6d 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveGenerationConfig.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveGenerationConfig.swift @@ -15,7 +15,8 @@ import Foundation /// Configuration options for live content generation. -@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, *) +@available(watchOS, unavailable) public struct LiveGenerationConfig: Sendable { let bidiGenerationConfig: BidiGenerationConfig let inputAudioTranscription: BidiAudioTranscriptionConfig? diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveGenerativeModel.swift b/FirebaseAI/Sources/Types/Public/Live/LiveGenerativeModel.swift index d53fb58f13f..cb244b83094 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveGenerativeModel.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveGenerativeModel.swift @@ -18,7 +18,8 @@ import Foundation /// various input types, supporting bidirectional streaming. /// /// You can create a new session via ``connect()``. -@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, *) +@available(watchOS, unavailable) public final class LiveGenerativeModel { let modelResourceName: String let firebaseInfo: FirebaseInfo diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveServerContent.swift b/FirebaseAI/Sources/Types/Public/Live/LiveServerContent.swift index 1e85ed8a85d..d37294d31c1 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveServerContent.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveServerContent.swift @@ -17,7 +17,8 @@ /// /// Content is generated as quickly as possible, and not in realtime. Clients /// may choose to buffer and play it out in realtime. -@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, *) +@available(watchOS, unavailable) public struct LiveServerContent: Sendable { let serverContent: BidiGenerateContentServerContent diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveServerGoAway.swift b/FirebaseAI/Sources/Types/Public/Live/LiveServerGoAway.swift index 9f35595ec96..77e712e7f88 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveServerGoAway.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveServerGoAway.swift @@ -17,7 +17,8 @@ import Foundation /// Server will not be able to service client soon. /// /// To learn more about session limits, see the docs on [Maximum session duration](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/multimodal-live#maximum-session-duration)\. -@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, *) +@available(watchOS, unavailable) public struct LiveServerGoAway: Sendable { let goAway: GoAway /// The remaining time before the connection will be terminated as ABORTED. diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveServerMessage.swift b/FirebaseAI/Sources/Types/Public/Live/LiveServerMessage.swift index d0f15b7d386..d1c879383a7 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveServerMessage.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveServerMessage.swift @@ -13,7 +13,8 @@ // limitations under the License. /// Update from the server, generated from the model in response to client messages. -@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, *) +@available(watchOS, unavailable) public struct LiveServerMessage: Sendable { let serverMessage: BidiGenerateContentServerMessage @@ -42,7 +43,8 @@ public struct LiveServerMessage: Sendable { // MARK: - Internal parsing -@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, *) +@available(watchOS, unavailable) extension LiveServerMessage { init?(from serverMessage: BidiGenerateContentServerMessage) { guard let messageType = LiveServerMessage.MessageType(from: serverMessage.messageType) else { @@ -54,7 +56,8 @@ extension LiveServerMessage { } } -@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, *) +@available(watchOS, unavailable) extension LiveServerMessage.MessageType { init?(from serverMessage: BidiGenerateContentServerMessage.MessageType) { switch serverMessage { diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveServerToolCall.swift b/FirebaseAI/Sources/Types/Public/Live/LiveServerToolCall.swift index 2ec031bab49..dc19c90b89b 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveServerToolCall.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveServerToolCall.swift @@ -16,7 +16,8 @@ /// /// The client should return matching ``FunctionResponsePart``, where the `functionId` fields /// correspond to individual ``FunctionCallPart``s. -@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, *) +@available(watchOS, unavailable) public struct LiveServerToolCall: Sendable { let serverToolCall: BidiGenerateContentToolCall diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveServerToolCallCancellation.swift b/FirebaseAI/Sources/Types/Public/Live/LiveServerToolCallCancellation.swift index 3732b27ad2a..ca7973c64b7 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveServerToolCallCancellation.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveServerToolCallCancellation.swift @@ -16,7 +16,8 @@ /// /// The client does not need to send ``FunctionResponsePart``s for the cancelled /// ``FunctionCallPart``s. -@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, *) +@available(watchOS, unavailable) public struct LiveServerToolCallCancellation: Sendable { let serverToolCallCancellation: BidiGenerateContentToolCallCancellation /// A list of `functionId`s matching the `functionId` provided in a previous diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift b/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift index 75a42962993..88e54514c8d 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift @@ -21,7 +21,8 @@ import Foundation /// through the incremental API (such as ``sendContent(_:turnComplete:)``). /// /// To create an instance of this class, see ``LiveGenerativeModel``. -@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, *) +@available(watchOS, unavailable) public final class LiveSession: Sendable { private let service: LiveSessionService diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveSessionErrors.swift b/FirebaseAI/Sources/Types/Public/Live/LiveSessionErrors.swift index 16d4fbc9d81..b57a47f6e07 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveSessionErrors.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveSessionErrors.swift @@ -21,6 +21,8 @@ import Foundation /// not supported. /// /// Check the `NSUnderlyingErrorKey` entry in ``errorUserInfo`` for the error that caused this. +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, *) +@available(watchOS, unavailable) public struct LiveSessionUnsupportedMessageError: Error, Sendable, CustomNSError { let underlyingError: Error @@ -39,6 +41,8 @@ public struct LiveSessionUnsupportedMessageError: Error, Sendable, CustomNSError /// The live session was closed, because the network connection was lost. /// /// Check the `NSUnderlyingErrorKey` entry in ``errorUserInfo`` for the error that caused this. +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, *) +@available(watchOS, unavailable) public struct LiveSessionLostConnectionError: Error, Sendable, CustomNSError { let underlyingError: Error From 5977fe1750a074c05d12c645846d548f21726227 Mon Sep 17 00:00:00 2001 From: Daymon Date: Tue, 30 Sep 2025 15:30:53 -0500 Subject: [PATCH 85/98] Add docs for audio response modality. --- FirebaseAI/Sources/Types/Public/ResponseModality.swift | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/FirebaseAI/Sources/Types/Public/ResponseModality.swift b/FirebaseAI/Sources/Types/Public/ResponseModality.swift index 743093d7c90..36080cd0409 100644 --- a/FirebaseAI/Sources/Types/Public/ResponseModality.swift +++ b/FirebaseAI/Sources/Types/Public/ResponseModality.swift @@ -49,6 +49,16 @@ public struct ResponseModality: EncodableProtoEnum, Sendable { /// > backwards-incompatible ways. public static let image = ResponseModality(kind: .image) + /// **Public Preview**: Specifies that the model should generate audio content. + /// + /// Use this modality when you need the model to produce (spoken) audio responses based on the + /// provided input or prompts. + /// + /// > Warning: This is **only** supported via the [live api](``LiveGenerativeModel``) currently. + /// > + /// > Furthermore, bidirectional streaming using Live models is in Public Preview, which means + /// > that the feature is not subject to any SLA or deprecation policy and could change in + /// > backwards-incompatible ways. public static let audio = ResponseModality(kind: .audio) let rawValue: String From 6a6b2bee38a0186319ace172d23b6ea31a03fd07 Mon Sep 17 00:00:00 2001 From: Daymon Date: Tue, 30 Sep 2025 15:34:54 -0500 Subject: [PATCH 86/98] Link to refdocs instead --- FirebaseAI/Sources/Types/Public/ResponseModality.swift | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/FirebaseAI/Sources/Types/Public/ResponseModality.swift b/FirebaseAI/Sources/Types/Public/ResponseModality.swift index 36080cd0409..7d58eda99ea 100644 --- a/FirebaseAI/Sources/Types/Public/ResponseModality.swift +++ b/FirebaseAI/Sources/Types/Public/ResponseModality.swift @@ -54,7 +54,8 @@ public struct ResponseModality: EncodableProtoEnum, Sendable { /// Use this modality when you need the model to produce (spoken) audio responses based on the /// provided input or prompts. /// - /// > Warning: This is **only** supported via the [live api](``LiveGenerativeModel``) currently. + /// > Warning: This is **only** supported via the [live api](https://firebase.google.com/docs/ai-logic/live-api) + /// > currently. /// > /// > Furthermore, bidirectional streaming using Live models is in Public Preview, which means /// > that the feature is not subject to any SLA or deprecation policy and could change in From 9970582662a143005d9e023f6e035a7ffc26c7dc Mon Sep 17 00:00:00 2001 From: Daymon Date: Tue, 30 Sep 2025 15:36:02 -0500 Subject: [PATCH 87/98] Slight rewording + fix formatting on docs --- FirebaseAI/Sources/Types/Public/ResponseModality.swift | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/FirebaseAI/Sources/Types/Public/ResponseModality.swift b/FirebaseAI/Sources/Types/Public/ResponseModality.swift index 7d58eda99ea..bd3e56d8e8b 100644 --- a/FirebaseAI/Sources/Types/Public/ResponseModality.swift +++ b/FirebaseAI/Sources/Types/Public/ResponseModality.swift @@ -54,8 +54,8 @@ public struct ResponseModality: EncodableProtoEnum, Sendable { /// Use this modality when you need the model to produce (spoken) audio responses based on the /// provided input or prompts. /// - /// > Warning: This is **only** supported via the [live api](https://firebase.google.com/docs/ai-logic/live-api) - /// > currently. + /// > Warning: This is currently **only** supported via the + /// > [live api](https://firebase.google.com/docs/ai-logic/live-api)\. /// > /// > Furthermore, bidirectional streaming using Live models is in Public Preview, which means /// > that the feature is not subject to any SLA or deprecation policy and could change in From 0ada63ef8a89a3f6a6281c8c5deec72c9d8c33e0 Mon Sep 17 00:00:00 2001 From: Daymon Date: Tue, 30 Sep 2025 15:44:47 -0500 Subject: [PATCH 88/98] Add back encodable for client message --- .../BidiGenerateContentClientMessage.swift | 24 ++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientMessage.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientMessage.swift index 25b8869e137..19529b99b03 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientMessage.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientMessage.swift @@ -33,4 +33,26 @@ enum BidiGenerateContentClientMessage { @available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, *) @available(watchOS, unavailable) -extension BidiGenerateContentClientMessage: Encodable {} +extension BidiGenerateContentClientMessage: Encodable { + enum CodingKeys: CodingKey { + case setup + case clientContent + case realtimeInput + case toolResponse + } + + func encode(to encoder: any Encoder) throws { + var container = encoder.container(keyedBy: CodingKeys.self) + switch self { + case let .setup(setup): + try container.encode(setup, forKey: .setup) + case let .clientContent(clientContent): + try container.encode(clientContent, forKey: .clientContent) + case let .realtimeInput(realtimeInput): + try container.encode(realtimeInput, forKey: .realtimeInput) + case let .toolResponse(toolResponse): + try container.encode(toolResponse, forKey: .toolResponse) + } + } +} + From 814b4d401312009e7130d9aae182fb287433e329 Mon Sep 17 00:00:00 2001 From: Daymon Date: Tue, 30 Sep 2025 16:05:52 -0500 Subject: [PATCH 89/98] Renaming GoAway to GoingAwayNotice for public api --- .../Sources/Types/Internal/Live/LiveSessionService.swift | 2 +- ...LiveServerGoAway.swift => LiveServerGoingAwayNotice.swift} | 2 +- FirebaseAI/Sources/Types/Public/Live/LiveServerMessage.swift | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) rename FirebaseAI/Sources/Types/Public/Live/{LiveServerGoAway.swift => LiveServerGoingAwayNotice.swift} (96%) diff --git a/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift b/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift index c166bb5efc2..ce1428d542a 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift @@ -222,7 +222,7 @@ actor LiveSessionService { setupComplete.resume() } } else if let liveMessage = LiveServerMessage(from: response) { - if case let .goAway(message) = liveMessage.messageType { + if case let .goingAwayNotice(message) = liveMessage.messageType { // TODO: (b/444045023) When auto session resumption is enabled, call `connect` again AILog.debug( code: .liveSessionGoingAwaySoon, diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveServerGoAway.swift b/FirebaseAI/Sources/Types/Public/Live/LiveServerGoingAwayNotice.swift similarity index 96% rename from FirebaseAI/Sources/Types/Public/Live/LiveServerGoAway.swift rename to FirebaseAI/Sources/Types/Public/Live/LiveServerGoingAwayNotice.swift index 77e712e7f88..981ddf0c251 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveServerGoAway.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveServerGoingAwayNotice.swift @@ -19,7 +19,7 @@ import Foundation /// To learn more about session limits, see the docs on [Maximum session duration](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/multimodal-live#maximum-session-duration)\. @available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, *) @available(watchOS, unavailable) -public struct LiveServerGoAway: Sendable { +public struct LiveServerGoingAwayNotice: Sendable { let goAway: GoAway /// The remaining time before the connection will be terminated as ABORTED. /// diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveServerMessage.swift b/FirebaseAI/Sources/Types/Public/Live/LiveServerMessage.swift index d1c879383a7..e84380a66d4 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveServerMessage.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveServerMessage.swift @@ -31,7 +31,7 @@ public struct LiveServerMessage: Sendable { case toolCallCancellation(LiveServerToolCallCancellation) /// Server will disconnect soon. - case goAway(LiveServerGoAway) + case goingAwayNotice(LiveServerGoingAwayNotice) } /// The actual message sent from the server. @@ -71,7 +71,7 @@ extension LiveServerMessage.MessageType { case let .toolCallCancellation(msg): self = .toolCallCancellation(LiveServerToolCallCancellation(msg)) case let .goAway(msg): - self = .goAway(LiveServerGoAway(msg)) + self = .goingAwayNotice(LiveServerGoingAwayNotice(msg)) } } } From 84adb364805967633745721b357cc3e73ffb721c Mon Sep 17 00:00:00 2001 From: Daymon Date: Tue, 30 Sep 2025 16:06:24 -0500 Subject: [PATCH 90/98] lint --- .../Types/Internal/Live/BidiGenerateContentClientMessage.swift | 1 - 1 file changed, 1 deletion(-) diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientMessage.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientMessage.swift index 19529b99b03..758d75e2cc7 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientMessage.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientMessage.swift @@ -55,4 +55,3 @@ extension BidiGenerateContentClientMessage: Encodable { } } } - From 166be933904724c46e361345843c50696c942459 Mon Sep 17 00:00:00 2001 From: Andrew Heard Date: Tue, 30 Sep 2025 17:24:47 -0400 Subject: [PATCH 91/98] Add workaround for `available(watchOS, unavailable)` being ignored --- .../Internal/Live/BidiGenerateContentServerMessage.swift | 4 ++-- .../Sources/Types/Internal/Live/LiveSessionService.swift | 2 +- FirebaseAI/Sources/Types/Public/Live/LiveServerToolCall.swift | 2 +- FirebaseAI/Sources/Types/Public/Live/LiveSession.swift | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift index bcf58b5b73d..ab7de34fba7 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift @@ -15,7 +15,7 @@ import Foundation /// Response message for BidiGenerateContent RPC call. -@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, *) +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) @available(watchOS, unavailable) struct BidiGenerateContentServerMessage: Sendable { /// The type of the message. @@ -48,7 +48,7 @@ struct BidiGenerateContentServerMessage: Sendable { // MARK: - Decodable -@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, *) +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) @available(watchOS, unavailable) extension BidiGenerateContentServerMessage: Decodable { enum CodingKeys: String, CodingKey { diff --git a/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift b/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift index ce1428d542a..c27bf213acb 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift @@ -28,7 +28,7 @@ import Foundation /// /// This mainly comes into play when we don't want to block developers from sending messages while a /// session is being reloaded. -@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, *) +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) @available(watchOS, unavailable) actor LiveSessionService { let responses: AsyncThrowingStream diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveServerToolCall.swift b/FirebaseAI/Sources/Types/Public/Live/LiveServerToolCall.swift index dc19c90b89b..7209e312c76 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveServerToolCall.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveServerToolCall.swift @@ -16,7 +16,7 @@ /// /// The client should return matching ``FunctionResponsePart``, where the `functionId` fields /// correspond to individual ``FunctionCallPart``s. -@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, *) +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) @available(watchOS, unavailable) public struct LiveServerToolCall: Sendable { let serverToolCall: BidiGenerateContentToolCall diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift b/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift index 88e54514c8d..1e7d0b6c8b4 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift @@ -21,7 +21,7 @@ import Foundation /// through the incremental API (such as ``sendContent(_:turnComplete:)``). /// /// To create an instance of this class, see ``LiveGenerativeModel``. -@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, *) +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) @available(watchOS, unavailable) public final class LiveSession: Sendable { private let service: LiveSessionService From 9228d9ba34568466cc4da9c88e510a7d5b41fd36 Mon Sep 17 00:00:00 2001 From: Andrew Heard Date: Tue, 30 Sep 2025 17:25:18 -0400 Subject: [PATCH 92/98] Add `available(watchOS, unavailable)` annotations to remaining types --- FirebaseAI/Sources/FirebaseAI.swift | 2 ++ FirebaseAI/Sources/Types/Internal/Live/AsyncWebSocket.swift | 2 ++ .../Sources/Types/Internal/Live/BidiGenerateContentSetup.swift | 2 ++ 3 files changed, 6 insertions(+) diff --git a/FirebaseAI/Sources/FirebaseAI.swift b/FirebaseAI/Sources/FirebaseAI.swift index 8518f3327ac..a4cce602095 100644 --- a/FirebaseAI/Sources/FirebaseAI.swift +++ b/FirebaseAI/Sources/FirebaseAI.swift @@ -156,6 +156,8 @@ public final class FirebaseAI: Sendable { /// - systemInstruction: Instructions that direct the model to behave a certain way; currently /// only text content is supported. /// - requestOptions: Configuration parameters for sending requests to the backend. + @available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, *) + @available(watchOS, unavailable) public func liveModel(modelName: String, generationConfig: LiveGenerationConfig? = nil, tools: [Tool]? = nil, diff --git a/FirebaseAI/Sources/Types/Internal/Live/AsyncWebSocket.swift b/FirebaseAI/Sources/Types/Internal/Live/AsyncWebSocket.swift index 09248755ee2..9392ccf8eba 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/AsyncWebSocket.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/AsyncWebSocket.swift @@ -21,6 +21,8 @@ private import FirebaseCoreInternal /// friendly interface for sending and consuming data from it. /// /// Also surfaces a more fine-grained ``WebSocketClosedError`` for when the web socket is closed. +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, *) +@available(watchOS, unavailable) final class AsyncWebSocket: NSObject, @unchecked Sendable, URLSessionWebSocketDelegate { private let webSocketTask: URLSessionWebSocketTask private let stream: AsyncThrowingStream diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetup.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetup.swift index 4f5fff00d07..15dc8889a0b 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetup.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetup.swift @@ -73,4 +73,6 @@ struct BidiGenerateContentSetup: Encodable { } } +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, *) +@available(watchOS, unavailable) struct BidiAudioTranscriptionConfig: Encodable {} From 140bfb84394e12c3c5aa244bf3bfa6c5ea7add77 Mon Sep 17 00:00:00 2001 From: Daymon <17409137+daymxn@users.noreply.github.com> Date: Tue, 30 Sep 2025 17:32:49 -0500 Subject: [PATCH 93/98] Update FirebaseAI/Sources/FirebaseAI.swift Co-authored-by: Nick Cooke <36927374+ncooke3@users.noreply.github.com> --- FirebaseAI/Sources/FirebaseAI.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FirebaseAI/Sources/FirebaseAI.swift b/FirebaseAI/Sources/FirebaseAI.swift index a4cce602095..f28a8348895 100644 --- a/FirebaseAI/Sources/FirebaseAI.swift +++ b/FirebaseAI/Sources/FirebaseAI.swift @@ -146,7 +146,7 @@ public final class FirebaseAI: Sendable { /// > Important: Only Live models (typically containing `live-*` in the name) are supported. /// /// - Parameters: - /// - modelName: The name of the Livemodel to use, for example + /// - modelName: The name of the Live model to use, for example /// `"gemini-live-2.5-flash-preview"`; /// see [model versions](https://firebase.google.com/docs/ai-logic/live-api?api=dev#models-that-support-capability) /// for a list of supported Live models. From 5a1d8365334fe6bce1b3c510c9be9e12d03ebae9 Mon Sep 17 00:00:00 2001 From: Daymon Date: Tue, 30 Sep 2025 17:57:23 -0500 Subject: [PATCH 94/98] Fix doc links + default to false for nullable bools --- .../Types/Public/Live/LiveServerContent.swift | 20 +++++++++---------- .../Types/Public/Live/LiveSession.swift | 4 ++-- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveServerContent.swift b/FirebaseAI/Sources/Types/Public/Live/LiveServerContent.swift index d37294d31c1..bd4d35ff8df 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveServerContent.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveServerContent.swift @@ -25,7 +25,7 @@ public struct LiveServerContent: Sendable { /// The content that the model has generated as part of the current /// conversation with the user. /// - /// This can be ``null`` if the message signifies something else (such + /// This can be `nil` if the message signifies something else (such /// as the turn ending). public var modelTurn: ModelContent? { serverContent.modelTurn } @@ -33,27 +33,27 @@ public struct LiveServerContent: Sendable { /// /// Generation will only start in response to additional client messages. /// - /// Can be set alongside ``content``, indicating that the ``content`` is + /// Can be set alongside `content`, indicating that the `content` is /// the last in the turn. - public var isTurnComplete: Bool? { serverContent.turnComplete } + public var isTurnComplete: Bool { serverContent.turnComplete ?? false } /// The model was interrupted by a client message while generating data. /// /// If the client is playing out the content in realtime, this is a /// good signal to stop and empty the current queue. - public var wasInterrupted: Bool? { serverContent.interrupted } + public var wasInterrupted: Bool { serverContent.interrupted ?? false } /// The model has finished _generating_ data for the current turn. /// /// For realtime playback, there will be a delay between when the model finishes generating - /// content and the client has finished playing back the generated content. ``generationComplete`` - /// indicates that the model is done generating data, while ``isturnComplete`` indicates the model + /// content and the client has finished playing back the generated content. `generationComplete` + /// indicates that the model is done generating data, while `isturnComplete` indicates the model /// is waiting for additional client messages. Sending a message during this delay may cause a - /// ``wasInterrupted`` message to be sent. + /// `wasInterrupted` message to be sent. /// - /// Note that if the model ``wasInterrupted``, this will not be set. The model will go from - /// ``wasInterrupted`` -> ``turnComplete``. - public var isGenerationComplete: Bool? { serverContent.generationComplete } + /// Note that if the model `wasInterrupted`, this will not be set. The model will go from + /// `wasInterrupted` -> `turnComplete`. + public var isGenerationComplete: Bool { serverContent.generationComplete ?? false } /// Metadata specifing the sources used to ground generated content. public var groundingMetadata: GroundingMetadata? { serverContent.groundingMetadata } diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift b/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift index 88e54514c8d..d62fd834973 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift @@ -99,7 +99,7 @@ public final class LiveSession: Sendable { /// - turnComplete: Whether the server should start generating content with the currently /// accumulated prompt, or await additional messages before starting generation. By default, /// the server will await additional messages. - public func sendContent(_ content: [ModelContent], turnComplete: Bool? = nil) async { + public func sendContent(_ content: [ModelContent], turnComplete: Bool = false) async { let message = BidiGenerateContentClientContent(turns: content, turnComplete: turnComplete) await service.send(.clientContent(message)) } @@ -120,7 +120,7 @@ public final class LiveSession: Sendable { /// accumulated prompt, or await additional messages before starting generation. By default, /// the server will await additional messages. public func sendContent(_ parts: any PartsRepresentable..., - turnComplete: Bool? = nil) async { + turnComplete: Bool = false) async { await sendContent([ModelContent(parts: parts)], turnComplete: turnComplete) } From c5b1567e40040e2fe2b404c004462e863faeae8d Mon Sep 17 00:00:00 2001 From: Daymon Date: Tue, 30 Sep 2025 18:09:20 -0500 Subject: [PATCH 95/98] Rephrase live models in docs --- FirebaseAI/Sources/FirebaseAI.swift | 8 ++++---- .../Types/Public/Live/AudioTranscriptionConfig.swift | 2 +- .../Sources/Types/Public/Live/LiveSessionErrors.swift | 7 +++---- FirebaseAI/Sources/Types/Public/ResponseModality.swift | 2 +- 4 files changed, 9 insertions(+), 10 deletions(-) diff --git a/FirebaseAI/Sources/FirebaseAI.swift b/FirebaseAI/Sources/FirebaseAI.swift index f28a8348895..0947b79935b 100644 --- a/FirebaseAI/Sources/FirebaseAI.swift +++ b/FirebaseAI/Sources/FirebaseAI.swift @@ -139,17 +139,17 @@ public final class FirebaseAI: Sendable { /// **[Public Preview]** Initializes a ``LiveGenerativeModel`` with the given parameters. /// - /// > Warning: For Firebase AI SDK, bidirectional streaming using Live models is in Public + /// > Warning: For Firebase AI SDK, bidirectional streaming using Live API models is in Public /// Preview, which means that the feature is not subject to any SLA or deprecation policy and /// could change in backwards-incompatible ways. /// - /// > Important: Only Live models (typically containing `live-*` in the name) are supported. + /// > Important: Only Live API models (typically containing `live-*` in the name) are supported. /// /// - Parameters: - /// - modelName: The name of the Live model to use, for example + /// - modelName: The name of the Live API model to use, for example /// `"gemini-live-2.5-flash-preview"`; /// see [model versions](https://firebase.google.com/docs/ai-logic/live-api?api=dev#models-that-support-capability) - /// for a list of supported Live models. + /// for a list of supported Live API models. /// - generationConfig: The content generation parameters your model should use. /// - tools: A list of ``Tool`` objects that the model may use to generate the next response. /// - toolConfig: Tool configuration for any ``Tool`` specified in the request. diff --git a/FirebaseAI/Sources/Types/Public/Live/AudioTranscriptionConfig.swift b/FirebaseAI/Sources/Types/Public/Live/AudioTranscriptionConfig.swift index aaba70471f9..da808d5561d 100644 --- a/FirebaseAI/Sources/Types/Public/Live/AudioTranscriptionConfig.swift +++ b/FirebaseAI/Sources/Types/Public/Live/AudioTranscriptionConfig.swift @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -/// Configuration options for audio transcriptions when communicating with a live model. +/// Configuration options for audio transcriptions when communicating with a Live API model. /// /// While there are not currently any options, this will likely change in the future. For now, just /// providing an instance of this struct will enable audio transcriptions for the corresponding diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveSessionErrors.swift b/FirebaseAI/Sources/Types/Public/Live/LiveSessionErrors.swift index b57a47f6e07..fb5cd8952cd 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveSessionErrors.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveSessionErrors.swift @@ -14,7 +14,7 @@ import Foundation -/// The live model sent a message that the SDK failed to parse. +/// The Live API model sent a message that the SDK failed to parse. /// /// This may indicate that the SDK version needs updating, a model is too old for the current SDK /// version, or that the model is just @@ -78,11 +78,10 @@ public struct LiveSessionUnexpectedClosureError: Error, Sendable, CustomNSError } } -/// The live model refused our request to setup a live session. +/// The Live API model refused our request to setup a live session. /// /// This can occur due to the model not supporting the requested response modalities, the project -/// not having access to the model, -/// the model being invalid, or some internal error. +/// not having access to the model, the model being invalid, or some internal error. /// /// Check the `NSUnderlyingErrorKey` entry in ``errorUserInfo`` for the error that caused this. @available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, *) diff --git a/FirebaseAI/Sources/Types/Public/ResponseModality.swift b/FirebaseAI/Sources/Types/Public/ResponseModality.swift index bd3e56d8e8b..17886c6b8f8 100644 --- a/FirebaseAI/Sources/Types/Public/ResponseModality.swift +++ b/FirebaseAI/Sources/Types/Public/ResponseModality.swift @@ -57,7 +57,7 @@ public struct ResponseModality: EncodableProtoEnum, Sendable { /// > Warning: This is currently **only** supported via the /// > [live api](https://firebase.google.com/docs/ai-logic/live-api)\. /// > - /// > Furthermore, bidirectional streaming using Live models is in Public Preview, which means + /// > Furthermore, bidirectional streaming using Live API models is in Public Preview, which means /// > that the feature is not subject to any SLA or deprecation policy and could change in /// > backwards-incompatible ways. public static let audio = ResponseModality(kind: .audio) From 89d95de9316ae5162c9cc5d5aa6afe7d6f5236d7 Mon Sep 17 00:00:00 2001 From: Andrew Heard Date: Tue, 30 Sep 2025 22:24:04 -0400 Subject: [PATCH 96/98] [Firebase AI] Replace `LiveServerMessage` struct+enum with protocol --- .../BidiGenerateContentServerMessage.swift | 8 +-- .../Internal/Live/LiveSessionService.swift | 43 ++++++++----- .../Types/Public/Live/LiveServerContent.swift | 8 ++- .../Live/LiveServerGoingAwayNotice.swift | 7 ++- .../Types/Public/Live/LiveServerMessage.swift | 60 +------------------ .../Public/Live/LiveServerToolCall.swift | 8 ++- .../Live/LiveServerToolCallCancellation.swift | 8 ++- 7 files changed, 56 insertions(+), 86 deletions(-) diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift index ab7de34fba7..e522e1a9701 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift @@ -40,7 +40,7 @@ struct BidiGenerateContentServerMessage: Sendable { } /// The message type. - let messageType: MessageType + let messageType: MessageType? /// Usage metadata about the response(s). let usageMetadata: GenerateContentResponse.UsageMetadata? @@ -86,11 +86,7 @@ extension BidiGenerateContentServerMessage: Decodable { } else if let goAway = try container.decodeIfPresent(GoAway.self, forKey: .goAway) { messageType = .goAway(goAway) } else { - let context = DecodingError.Context( - codingPath: decoder.codingPath, - debugDescription: "Could not decode server message." - ) - throw DecodingError.dataCorrupted(context) + messageType = nil } usageMetadata = try container.decodeIfPresent( diff --git a/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift b/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift index c27bf213acb..cd2d793113e 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/LiveSessionService.swift @@ -206,10 +206,14 @@ actor LiveSessionService { from: message ) } catch { - throw LiveSessionUnsupportedMessageError(underlyingError: error) + setupComplete.resume( + throwing: LiveSessionUnsupportedMessageError(underlyingError: error) + ) + return } - if case .setupComplete = response.messageType { + switch response.messageType { + case .setupComplete: if resumed { AILog.debug( code: .duplicateLiveSessionSetupComplete, @@ -221,18 +225,29 @@ actor LiveSessionService { resumed = true setupComplete.resume() } - } else if let liveMessage = LiveServerMessage(from: response) { - if case let .goingAwayNotice(message) = liveMessage.messageType { - // TODO: (b/444045023) When auto session resumption is enabled, call `connect` again - AILog.debug( - code: .liveSessionGoingAwaySoon, - "Session expires in: \(message.goAway.timeLeft?.timeInterval ?? 0)" - ) - } - - responseContinuation.yield(liveMessage) - } else { - // we don't raise an error, since this allows us to add support internally but not + case let .goAway(goAway): + // TODO: (b/444045023) When auto session resumption is enabled, call `connect` again + AILog.debug( + code: .liveSessionGoingAwaySoon, + "Session expires in: \(goAway.timeLeft?.timeInterval ?? 0)" + ) + responseContinuation.yield( + LiveServerGoingAwayNotice(goAway, usageMetadata: response.usageMetadata) + ) + case let .serverContent(serverContent): + responseContinuation.yield( + LiveServerContent(serverContent, usageMetadata: response.usageMetadata) + ) + case let .toolCall(toolCall): + responseContinuation.yield( + LiveServerToolCall(toolCall, usageMetadata: response.usageMetadata) + ) + case let .toolCallCancellation(toolCallCancellation): + responseContinuation.yield(LiveServerToolCallCancellation( + toolCallCancellation, usageMetadata: response.usageMetadata + )) + case .none: + // We don't raise an error, since this allows us to add support internally but not // publicly. We still log it in debug though, in case it's not expected. AILog.debug( code: .liveSessionUnsupportedMessage, diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveServerContent.swift b/FirebaseAI/Sources/Types/Public/Live/LiveServerContent.swift index bd4d35ff8df..df994a8f1ff 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveServerContent.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveServerContent.swift @@ -19,7 +19,7 @@ /// may choose to buffer and play it out in realtime. @available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, *) @available(watchOS, unavailable) -public struct LiveServerContent: Sendable { +public struct LiveServerContent: LiveServerMessage { let serverContent: BidiGenerateContentServerContent /// The content that the model has generated as part of the current @@ -77,7 +77,11 @@ public struct LiveServerContent: Sendable { serverContent.outputTranscription.map { LiveTranscription($0) } } - init(_ serverContent: BidiGenerateContentServerContent) { + public let usageMetadata: GenerateContentResponse.UsageMetadata? + + init(_ serverContent: BidiGenerateContentServerContent, + usageMetadata: GenerateContentResponse.UsageMetadata?) { self.serverContent = serverContent + self.usageMetadata = usageMetadata } } diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveServerGoingAwayNotice.swift b/FirebaseAI/Sources/Types/Public/Live/LiveServerGoingAwayNotice.swift index 981ddf0c251..4aa5b97379b 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveServerGoingAwayNotice.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveServerGoingAwayNotice.swift @@ -19,7 +19,7 @@ import Foundation /// To learn more about session limits, see the docs on [Maximum session duration](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/multimodal-live#maximum-session-duration)\. @available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, *) @available(watchOS, unavailable) -public struct LiveServerGoingAwayNotice: Sendable { +public struct LiveServerGoingAwayNotice: LiveServerMessage { let goAway: GoAway /// The remaining time before the connection will be terminated as ABORTED. /// @@ -27,7 +27,10 @@ public struct LiveServerGoingAwayNotice: Sendable { /// the rate limits for a given model. public var timeLeft: TimeInterval? { goAway.timeLeft?.timeInterval } - init(_ goAway: GoAway) { + public let usageMetadata: GenerateContentResponse.UsageMetadata? + + init(_ goAway: GoAway, usageMetadata: GenerateContentResponse.UsageMetadata?) { self.goAway = goAway + self.usageMetadata = usageMetadata } } diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveServerMessage.swift b/FirebaseAI/Sources/Types/Public/Live/LiveServerMessage.swift index e84380a66d4..27545e14cc1 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveServerMessage.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveServerMessage.swift @@ -15,63 +15,7 @@ /// Update from the server, generated from the model in response to client messages. @available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, *) @available(watchOS, unavailable) -public struct LiveServerMessage: Sendable { - let serverMessage: BidiGenerateContentServerMessage - - /// The type of message sent from the server. - public enum MessageType: Sendable { - /// Content generated by the model in response to client messages. - case content(LiveServerContent) - - /// Request for the client to execute the provided functions. - case toolCall(LiveServerToolCall) - - /// Notification for the client that a previously issued ``LiveServerToolCall`` should be - /// cancelled. - case toolCallCancellation(LiveServerToolCallCancellation) - - /// Server will disconnect soon. - case goingAwayNotice(LiveServerGoingAwayNotice) - } - - /// The actual message sent from the server. - public var messageType: MessageType - +public protocol LiveServerMessage: Sendable { /// Metadata on the usage of the cached content. - public var usageMetadata: GenerateContentResponse.UsageMetadata? { serverMessage.usageMetadata } -} - -// MARK: - Internal parsing - -@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, *) -@available(watchOS, unavailable) -extension LiveServerMessage { - init?(from serverMessage: BidiGenerateContentServerMessage) { - guard let messageType = LiveServerMessage.MessageType(from: serverMessage.messageType) else { - return nil - } - - self.serverMessage = serverMessage - self.messageType = messageType - } -} - -@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, *) -@available(watchOS, unavailable) -extension LiveServerMessage.MessageType { - init?(from serverMessage: BidiGenerateContentServerMessage.MessageType) { - switch serverMessage { - case .setupComplete: - // this is handled internally, and should not be surfaced to users - return nil - case let .serverContent(msg): - self = .content(LiveServerContent(msg)) - case let .toolCall(msg): - self = .toolCall(LiveServerToolCall(msg)) - case let .toolCallCancellation(msg): - self = .toolCallCancellation(LiveServerToolCallCancellation(msg)) - case let .goAway(msg): - self = .goingAwayNotice(LiveServerGoingAwayNotice(msg)) - } - } + var usageMetadata: GenerateContentResponse.UsageMetadata? { get } } diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveServerToolCall.swift b/FirebaseAI/Sources/Types/Public/Live/LiveServerToolCall.swift index 7209e312c76..45ef3b789a2 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveServerToolCall.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveServerToolCall.swift @@ -18,7 +18,7 @@ /// correspond to individual ``FunctionCallPart``s. @available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) @available(watchOS, unavailable) -public struct LiveServerToolCall: Sendable { +public struct LiveServerToolCall: LiveServerMessage { let serverToolCall: BidiGenerateContentToolCall /// A list of ``FunctionCallPart`` to run and return responses for. @@ -26,7 +26,11 @@ public struct LiveServerToolCall: Sendable { serverToolCall.functionCalls?.map { FunctionCallPart($0) } } - init(_ serverToolCall: BidiGenerateContentToolCall) { + public let usageMetadata: GenerateContentResponse.UsageMetadata? + + init(_ serverToolCall: BidiGenerateContentToolCall, + usageMetadata: GenerateContentResponse.UsageMetadata?) { self.serverToolCall = serverToolCall + self.usageMetadata = usageMetadata } } diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveServerToolCallCancellation.swift b/FirebaseAI/Sources/Types/Public/Live/LiveServerToolCallCancellation.swift index ca7973c64b7..d3566139ff4 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveServerToolCallCancellation.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveServerToolCallCancellation.swift @@ -18,13 +18,17 @@ /// ``FunctionCallPart``s. @available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, *) @available(watchOS, unavailable) -public struct LiveServerToolCallCancellation: Sendable { +public struct LiveServerToolCallCancellation: LiveServerMessage { let serverToolCallCancellation: BidiGenerateContentToolCallCancellation /// A list of `functionId`s matching the `functionId` provided in a previous /// ``LiveServerToolCall``, where only the provided `functionId`s should be cancelled. public var ids: [String]? { serverToolCallCancellation.ids } - init(_ serverToolCallCancellation: BidiGenerateContentToolCallCancellation) { + public let usageMetadata: GenerateContentResponse.UsageMetadata? + + init(_ serverToolCallCancellation: BidiGenerateContentToolCallCancellation, + usageMetadata: GenerateContentResponse.UsageMetadata?) { self.serverToolCallCancellation = serverToolCallCancellation + self.usageMetadata = usageMetadata } } From 728ead5a490bfa33aaee42f7dd4c5a9f94ea829b Mon Sep 17 00:00:00 2001 From: Andrew Heard Date: Tue, 30 Sep 2025 22:50:23 -0400 Subject: [PATCH 97/98] Add an API test to demonstrate usage --- FirebaseAI/Tests/Unit/APITests.swift | 42 ++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/FirebaseAI/Tests/Unit/APITests.swift b/FirebaseAI/Tests/Unit/APITests.swift index 16c963b1f0c..676d89264a4 100644 --- a/FirebaseAI/Tests/Unit/APITests.swift +++ b/FirebaseAI/Tests/Unit/APITests.swift @@ -183,4 +183,46 @@ final class APITests: XCTestCase { let _: String? = response.text let _: [FunctionCallPart] = response.functionCalls } + + func liveSessionAPI() async throws { + let firebaseAI = FirebaseAI.firebaseAI() + + // Initialize a Live Model + let liveModel = firebaseAI.liveModel(modelName: "gemini-2.0-flash-live-001") + + // Start a Live session + let session = try await liveModel.connect() + + // Add history incrementally to the session + await session.sendContent("Where is Google headquarters located?") + await session.sendContent("Respond in the format 'City, State'", turnComplete: true) + await session.sendContent( + [ModelContent(role: "model", parts: [TextPart("Mountain View, California")])], + turnComplete: true + ) + + // Send realtime data + await session.sendTextRealtime("What year was it founded?") + await session.sendAudioRealtime(Data()) + await session.sendVideoRealtime(Data(), format: "mp4") + + // Handle response content + for try await response in session.responses { + switch response { + case let serverContent as LiveServerContent: + print("Server content: \(serverContent)") + case let toolCall as LiveServerToolCall: + print("Tool call: \(toolCall)") + case let toolCallCancellation as LiveServerToolCallCancellation: + print("Tool call cancellation: \(toolCallCancellation)") + case let goingAway as LiveServerGoingAwayNotice: + print("Session is going away: \(goingAway)") + default: + print("Unexpected response type: \(response)") + } + } + + // Close a Live session + await session.close() + } } From e692f64c71068ad269b93a6c21a85b06c61a20cc Mon Sep 17 00:00:00 2001 From: Andrew Heard Date: Tue, 30 Sep 2025 23:06:49 -0400 Subject: [PATCH 98/98] Mark `APITests.liveSessionAPI` as unavailable on watchOS --- FirebaseAI/Tests/Unit/APITests.swift | 1 + 1 file changed, 1 insertion(+) diff --git a/FirebaseAI/Tests/Unit/APITests.swift b/FirebaseAI/Tests/Unit/APITests.swift index 676d89264a4..02923fc4f81 100644 --- a/FirebaseAI/Tests/Unit/APITests.swift +++ b/FirebaseAI/Tests/Unit/APITests.swift @@ -184,6 +184,7 @@ final class APITests: XCTestCase { let _: [FunctionCallPart] = response.functionCalls } + @available(watchOS, unavailable) func liveSessionAPI() async throws { let firebaseAI = FirebaseAI.firebaseAI()