From 3ff6776470bb24c44cfee668f1ced9f0ff1f0d01 Mon Sep 17 00:00:00 2001 From: Andrew Heard Date: Sat, 9 Aug 2025 12:47:55 -0400 Subject: [PATCH 01/13] [Firebase AI] Add starter types for Live API --- .../Internal/Live/ActivityHandling.swift | 35 +++++++++++ .../BidiGenerateContentClientContent.swift | 34 ++++++++++ .../BidiGenerateContentClientMessage.swift | 30 +++++++++ .../BidiGenerateContentRealtimeInput.swift | 63 +++++++++++++++++++ .../BidiGenerateContentServerContent.swift | 52 +++++++++++++++ .../BidiGenerateContentServerMessage.swift | 45 +++++++++++++ .../Live/BidiGenerateContentSetup.swift | 57 +++++++++++++++++ .../BidiGenerateContentSetupComplete.swift | 18 ++++++ .../Live/BidiGenerateContentToolCall.swift | 22 +++++++ ...iGenerateContentToolCallCancellation.swift | 25 ++++++++ .../BidiGenerateContentToolResponse.swift | 28 +++++++++ .../Types/Internal/Live/EndSensitivity.swift | 33 ++++++++++ .../Sources/Types/Internal/Live/GoAway.swift | 23 +++++++ .../Internal/Live/RealtimeInputConfig.swift | 54 ++++++++++++++++ .../Internal/Live/StartSensitivity.swift | 33 ++++++++++ .../Types/Internal/Live/TurnCoverage.swift | 36 +++++++++++ 16 files changed, 588 insertions(+) create mode 100644 FirebaseAI/Sources/Types/Internal/Live/ActivityHandling.swift create mode 100644 FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientContent.swift create mode 100644 FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientMessage.swift create mode 100644 FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentRealtimeInput.swift create mode 100644 FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerContent.swift create mode 100644 FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift create mode 100644 FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetup.swift create mode 100644 FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetupComplete.swift create mode 100644 FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolCall.swift create mode 100644 FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolCallCancellation.swift create mode 100644 FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolResponse.swift create mode 100644 FirebaseAI/Sources/Types/Internal/Live/EndSensitivity.swift create mode 100644 FirebaseAI/Sources/Types/Internal/Live/GoAway.swift create mode 100644 FirebaseAI/Sources/Types/Internal/Live/RealtimeInputConfig.swift create mode 100644 FirebaseAI/Sources/Types/Internal/Live/StartSensitivity.swift create mode 100644 FirebaseAI/Sources/Types/Internal/Live/TurnCoverage.swift diff --git a/FirebaseAI/Sources/Types/Internal/Live/ActivityHandling.swift b/FirebaseAI/Sources/Types/Internal/Live/ActivityHandling.swift new file mode 100644 index 00000000000..26867a98925 --- /dev/null +++ b/FirebaseAI/Sources/Types/Internal/Live/ActivityHandling.swift @@ -0,0 +1,35 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +/// The different ways of handling user activity. +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +public struct ActivityHandling: EncodableProtoEnum, Hashable, Sendable { + enum Kind: String { + case interrupts = "START_OF_ACTIVITY_INTERRUPTS" + case noInterrupt = "NO_INTERRUPTION" + } + + /// If true, start of activity will interrupt the model's response (also + /// called "barge in"). The model's current response will be cut-off in the + /// moment of the interruption. This is the default behavior. + public static let interrupts = ActivityHandling(kind: .interrupts) + + /// The model's response will not be interrupted. + public static let noInterrupt = ActivityHandling(kind: .noInterrupt) + + /// Returns the raw string representation of the `ActivityHandling` value. + public let rawValue: String +} diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientContent.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientContent.swift new file mode 100644 index 00000000000..91fed495ac5 --- /dev/null +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientContent.swift @@ -0,0 +1,34 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +/// Incremental update of the current conversation delivered from the client. +/// All the content here is unconditionally appended to the conversation +/// history and used as part of the prompt to the model to generate content. +/// +/// A message here will interrupt any current model generation. +struct BidiGenerateContentClientContent: Encodable { + /// The content appended to the current conversation with the model. + /// + /// For single-turn queries, this is a single instance. For multi-turn + /// queries, this is a repeated field that contains conversation history and + /// latest request. + let turns: [ModelContent]? + + /// If true, indicates that the server content generation should start with + /// the currently accumulated prompt. Otherwise, the server will await + /// additional messages before starting generation. + let turnComplete: Bool? +} diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientMessage.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientMessage.swift new file mode 100644 index 00000000000..88e9ac96896 --- /dev/null +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientMessage.swift @@ -0,0 +1,30 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +/// Messages sent by the client in the BidiGenerateContent RPC call. +enum BidiGenerateContentClientMessage: Encodable { + /// Message to be sent in the first and only first client message. + case setup(BidiGenerateContentSetup) + + /// Incremental update of the current conversation delivered from the client. + case clientContent(BidiGenerateContentClientContent) + + /// User input that is sent in real time. + case realtimeInput(BidiGenerateContentRealtimeInput) + + /// Response to a `ToolCallMessage` received from the server. + case toolResponse(BidiGenerateContentToolResponse) +} diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentRealtimeInput.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentRealtimeInput.swift new file mode 100644 index 00000000000..26a9f84d8d7 --- /dev/null +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentRealtimeInput.swift @@ -0,0 +1,63 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +/// User input that is sent in real time. +/// +/// This is different from `ClientContentUpdate` in a few ways: +/// +/// - Can be sent continuously without interruption to model generation. +/// - If there is a need to mix data interleaved across the +/// `ClientContentUpdate` and the `RealtimeUpdate`, server attempts to +/// optimize for best response, but there are no guarantees. +/// - End of turn is not explicitly specified, but is rather derived from user +/// activity (for example, end of speech). +/// - Even before the end of turn, the data is processed incrementally +/// to optimize for a fast start of the response from the model. +/// - Is always assumed to be the user's input (cannot be used to populate +/// conversation history). +struct BidiGenerateContentRealtimeInput: Encodable { + /// These form the realtime audio input stream. + let audio: Data? + + /// Indicates that the audio stream has ended, e.g. because the microphone was + /// turned off. + /// + /// This should only be sent when automatic activity detection is enabled + /// (which is the default). + /// + /// The client can reopen the stream by sending an audio message. + let audioStreamEnd: Bool? + + /// These form the realtime video input stream. + let video: Data? + + /// These form the realtime text input stream. + let text: String? + + /// Marks the start of user activity. + struct ActivityStart: Encodable {} + + /// Marks the start of user activity. This can only be sent if automatic + /// (i.e. server-side) activity detection is disabled. + let activityStart: ActivityStart? + + /// Marks the end of user activity. + struct ActivityEnd: Encodable {} + + /// Marks the end of user activity. This can only be sent if automatic (i.e. + // server-side) activity detection is disabled. + let activityEnd: ActivityEnd? +} diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerContent.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerContent.swift new file mode 100644 index 00000000000..f09ec48a303 --- /dev/null +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerContent.swift @@ -0,0 +1,52 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +/// Incremental server update generated by the model in response to client +/// messages. +/// +/// Content is generated as quickly as possible, and not in realtime. Clients +/// may choose to buffer and play it out in realtime. +struct BidiGenerateContentServerContent: Decodable { + /// The content that the model has generated as part of the current + /// conversation with the user. + let modelTurn: ModelContent? + + /// If true, indicates that the model is done generating. Generation will only + /// start in response to additional client messages. Can be set alongside + /// `content`, indicating that the `content` is the last in the turn. + let turnComplete: Bool? + + /// If true, indicates that a client message has interrupted current model + /// generation. If the client is playing out the content in realtime, this is a + /// good signal to stop and empty the current queue. If the client is playing + /// out the content in realtime, this is a good signal to stop and empty the + /// current playback queue. + let interrupted: Bool? + + /// If true, indicates that the model is done generating. + /// + /// When model is interrupted while generating there will be no + /// 'generation_complete' message in interrupted turn, it will go through + /// 'interrupted > turn_complete'. + /// + /// When model assumes realtime playback there will be delay between + /// generation_complete and turn_complete that is caused by model waiting for + /// playback to finish. + let generationComplete: Bool? + + /// Metadata specifies sources used to ground generated content. + let groundingMetadata: GroundingMetadata? +} diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift new file mode 100644 index 00000000000..627fa12d771 --- /dev/null +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift @@ -0,0 +1,45 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +/// Response message for BidiGenerateContent RPC call. +struct BidiGenerateContentServerMessage: Decodable { + /// The type of the message. + enum MessageType: Decodable { + /// Sent in response to a `BidiGenerateContentSetup` message from the client. + case setupComplete(BidiGenerateContentSetupComplete) + + /// Content generated by the model in response to client messages. + case serverContent(BidiGenerateContentServerContent) + + /// Request for the client to execute the `function_calls` and return the + /// responses with the matching `id`s. + case toolCall(BidiGenerateContentToolCall) + + /// Notification for the client that a previously issued + /// `ToolCallMessage` with the specified `id`s should have been not executed + /// and should be cancelled. + case toolCallCancellation(BidiGenerateContentToolCallCancellation) + + /// Server will disconnect soon. + case goAway(GoAway) + } + + /// The message type. + let messageType: MessageType + + /// Usage metadata about the response(s). + let usageMetadata: GenerateContentResponse.UsageMetadata? +} diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetup.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetup.swift new file mode 100644 index 00000000000..ec199a59b41 --- /dev/null +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetup.swift @@ -0,0 +1,57 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +/// Message to be sent in the first and only first +/// `BidiGenerateContentClientMessage`. Contains configuration that will apply +/// for the duration of the streaming RPC. +/// +/// Clients should wait for a `BidiGenerateContentSetupComplete` message before +/// sending any additional messages. +struct BidiGenerateContentSetup: Encodable { + /// The fully qualified name of the publisher model. + /// + /// Publisher model format: + /// `projects/{project}/locations/{location}/publishers/*/models/*` + let model: String + + /// Generation config. + /// + /// The following fields aren't supported: + /// + /// - `response_logprobs` + /// - `response_mime_type` + /// - `logprobs` + /// - `response_schema` + /// - `stop_sequence` + /// - `routing_config` + /// - `audio_timestamp` + let generationConfig: GenerationConfig? + + /// The user provided system instructions for the model. + /// Note: only text should be used in parts and content in each part will be + /// in a separate paragraph. + let systemInstruction: ModelContent? + + /// A list of `Tools` the model may use to generate the next response. + /// + /// A `Tool` is a piece of code that enables the system to interact with + /// external systems to perform an action, or set of actions, outside of + /// knowledge and scope of the model. + let tools: [Tool]? + + /// Configures the handling of realtime input. + let realtimeInputConfig: RealtimeInputConfig? +} diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetupComplete.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetupComplete.swift new file mode 100644 index 00000000000..a2b02c0caf2 --- /dev/null +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetupComplete.swift @@ -0,0 +1,18 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +/// Sent in response to a `BidiGenerateContentSetup` message from the client. +struct BidiGenerateContentSetupComplete: Decodable {} diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolCall.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolCall.swift new file mode 100644 index 00000000000..e53decadfab --- /dev/null +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolCall.swift @@ -0,0 +1,22 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +/// Request for the client to execute the `function_calls` and return the +/// responses with the matching `id`s. +struct BidiGenerateContentToolCall: Decodable { + /// The function call to be executed. + let functionCalls: [FunctionCall]? +} diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolCallCancellation.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolCallCancellation.swift new file mode 100644 index 00000000000..fb25fd9f330 --- /dev/null +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolCallCancellation.swift @@ -0,0 +1,25 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +/// Notification for the client that a previously issued `ToolCallMessage` +/// with the specified `id`s should have been not executed and should be +/// cancelled. If there were side-effects to those tool calls, clients may +/// attempt to undo the tool calls. This message occurs only in cases where the +/// clients interrupt server turns. +struct BidiGenerateContentToolCallCancellation: Decodable { + /// The ids of the tool calls to be cancelled. + let ids: [String]? +} diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolResponse.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolResponse.swift new file mode 100644 index 00000000000..245f2668a0e --- /dev/null +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolResponse.swift @@ -0,0 +1,28 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +/// Client generated response to a `ToolCall` received from the server. +/// Individual `FunctionResponse` objects are matched to the respective +/// `FunctionCall` objects by the `id` field. +/// +/// Note that in the unary and server-streaming GenerateContent APIs function +/// calling happens by exchanging the `Content` parts, while in the bidi +/// GenerateContent APIs function calling happens over these dedicated set of +/// messages. +struct BidiGenerateContentToolResponse: Encodable { + /// The response to the function calls. + let functionResponses: [FunctionResponse]? +} diff --git a/FirebaseAI/Sources/Types/Internal/Live/EndSensitivity.swift b/FirebaseAI/Sources/Types/Internal/Live/EndSensitivity.swift new file mode 100644 index 00000000000..6caa5d85440 --- /dev/null +++ b/FirebaseAI/Sources/Types/Internal/Live/EndSensitivity.swift @@ -0,0 +1,33 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +/// End of speech sensitivity. +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +public struct EndSensitivity: EncodableProtoEnum, Hashable, Sendable { + enum Kind: String { + case high = "END_SENSITIVITY_HIGH" + case low = "END_SENSITIVITY_LOW" + } + + /// Automatic detection will end speech more often. + public static let high = EndSensitivity(kind: .high) + + /// Automatic detection will end speech less often. + public static let low = EndSensitivity(kind: .low) + + /// Returns the raw string representation of the `EndSensitivity` value. + public let rawValue: String +} diff --git a/FirebaseAI/Sources/Types/Internal/Live/GoAway.swift b/FirebaseAI/Sources/Types/Internal/Live/GoAway.swift new file mode 100644 index 00000000000..729d86c6cfd --- /dev/null +++ b/FirebaseAI/Sources/Types/Internal/Live/GoAway.swift @@ -0,0 +1,23 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +/// Server will not be able to service client soon. +struct GoAway: Decodable { + /// The remaining time before the connection will be terminated as ABORTED. + /// The minimal time returned here is specified differently together with + /// the rate limits for a given model. + let timeLeft: TimeInterval? +} diff --git a/FirebaseAI/Sources/Types/Internal/Live/RealtimeInputConfig.swift b/FirebaseAI/Sources/Types/Internal/Live/RealtimeInputConfig.swift new file mode 100644 index 00000000000..8ebade9b98b --- /dev/null +++ b/FirebaseAI/Sources/Types/Internal/Live/RealtimeInputConfig.swift @@ -0,0 +1,54 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +/// Configures the realtime input behavior in `BidiGenerateContent`. +struct RealtimeInputConfig: Encodable { + /// Configures automatic detection of activity. + struct AutomaticActivityDetection: Encodable { + /// If enabled, detected voice and text input count as activity. If + /// disabled, the client must send activity signals. + let disabled: Bool? + + /// Determines how likely speech is to be detected. + let startOfSpeechSensitivity: StartSensitivity? + + /// Determines how likely detected speech is ended. + let endOfSpeechSensitivity: EndSensitivity? + + /// The required duration of detected speech before start-of-speech is + /// committed. The lower this value the more sensitive the start-of-speech + /// detection is and the shorter speech can be recognized. However, this + /// also increases the probability of false positives. + let prefixPaddingMS: Int? + + /// The required duration of detected silence (or non-speech) before + // end-of-speech is committed. The larger this value, the longer speech + // gaps can be without interrupting the user's activity but this will + // increase the model's latency. + let silenceDurationMS: Int? + } + + /// If not set, automatic activity detection is enabled by default. If + /// automatic voice detection is disabled, the client must send activity + /// signals. + let automaticActivityDetection: AutomaticActivityDetection? + + /// Defines what effect activity has. + let activityHandling: ActivityHandling? + + /// Defines which input is included in the user's turn. + let turnCoverage: TurnCoverage? +} diff --git a/FirebaseAI/Sources/Types/Internal/Live/StartSensitivity.swift b/FirebaseAI/Sources/Types/Internal/Live/StartSensitivity.swift new file mode 100644 index 00000000000..ef0e1fda073 --- /dev/null +++ b/FirebaseAI/Sources/Types/Internal/Live/StartSensitivity.swift @@ -0,0 +1,33 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +/// Start of speech sensitivity. +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +public struct StartSensitivity: EncodableProtoEnum, Hashable, Sendable { + enum Kind: String { + case high = "START_SENSITIVITY_HIGH" + case low = "START_SENSITIVITY_LOW" + } + + /// Automatic detection will detect the start of speech more often. + public static let high = StartSensitivity(kind: .high) + + /// Automatic detection will detect the start of speech less often. + public static let low = StartSensitivity(kind: .low) + + /// Returns the raw string representation of the `StartSensitivity` value. + public let rawValue: String +} diff --git a/FirebaseAI/Sources/Types/Internal/Live/TurnCoverage.swift b/FirebaseAI/Sources/Types/Internal/Live/TurnCoverage.swift new file mode 100644 index 00000000000..5d69fee78ce --- /dev/null +++ b/FirebaseAI/Sources/Types/Internal/Live/TurnCoverage.swift @@ -0,0 +1,36 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +/// Options about which input is included in the user's turn. +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +public struct TurnCoverage: EncodableProtoEnum, Hashable, Sendable { + enum Kind: String { + case onlyActivity = "TURN_INCLUDES_ONLY_ACTIVITY" + case allInput = "TURN_INCLUDES_ALL_INPUT" + } + + /// The users turn only includes activity since the last turn, excluding + /// inactivity (e.g. silence on the audio stream). + public static let onlyActivity = TurnCoverage(kind: .onlyActivity) + + /// The users turn includes all realtime input since the last turn, including + /// inactivity (e.g. silence on the audio stream). This is the default + // behavior. + public static let allInput = TurnCoverage(kind: .allInput) + + /// Returns the raw string representation of the `TurnCoverage` value. + public let rawValue: String +} From e7d879076aa70077fd3de790e1bc8d4e52881d64 Mon Sep 17 00:00:00 2001 From: Andrew Heard Date: Sat, 9 Aug 2025 13:09:40 -0400 Subject: [PATCH 02/13] Add placeholder types for `LiveGenerativeModel` and `LiveSession` --- .../Public/Live/LiveGenerativeModel.swift | 41 +++++++++++++++++++ .../Types/Public/Live/LiveSession.swift | 15 +++++++ 2 files changed, 56 insertions(+) create mode 100644 FirebaseAI/Sources/Types/Public/Live/LiveGenerativeModel.swift create mode 100644 FirebaseAI/Sources/Types/Public/Live/LiveSession.swift diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveGenerativeModel.swift b/FirebaseAI/Sources/Types/Public/Live/LiveGenerativeModel.swift new file mode 100644 index 00000000000..b93e6bfd4ae --- /dev/null +++ b/FirebaseAI/Sources/Types/Public/Live/LiveGenerativeModel.swift @@ -0,0 +1,41 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +public final class LiveGenerativeModel { + let modelResourceName: String + let apiConfig: APIConfig + let requestOptions: RequestOptions + + init(modelResourceName: String, + firebaseInfo: FirebaseInfo, + apiConfig: APIConfig, + requestOptions: RequestOptions, + urlSession: URLSession = GenAIURLSession.default) { + self.modelResourceName = modelResourceName + self.apiConfig = apiConfig + // TODO: Add LiveGenerationConfig + // TODO: Add tools + // TODO: Add tool config + // TODO: Add system instruction + self.requestOptions = requestOptions + } + + public func connect() async throws -> LiveSession { + // TODO: Implement connection + return LiveSession() + } +} diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift b/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift new file mode 100644 index 00000000000..f2c88d35492 --- /dev/null +++ b/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift @@ -0,0 +1,15 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +public final class LiveSession {} From 58bf6029a63db235157c4f05e4e3116d5013e58c Mon Sep 17 00:00:00 2001 From: Andrew Heard Date: Sun, 10 Aug 2025 12:48:30 -0400 Subject: [PATCH 03/13] Fix `BidiGenerateContentClientMessage` encoding --- .../BidiGenerateContentClientMessage.swift | 25 ++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientMessage.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientMessage.swift index 88e9ac96896..147e986e863 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientMessage.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientMessage.swift @@ -15,7 +15,7 @@ import Foundation /// Messages sent by the client in the BidiGenerateContent RPC call. -enum BidiGenerateContentClientMessage: Encodable { +enum BidiGenerateContentClientMessage { /// Message to be sent in the first and only first client message. case setup(BidiGenerateContentSetup) @@ -28,3 +28,26 @@ enum BidiGenerateContentClientMessage: Encodable { /// Response to a `ToolCallMessage` received from the server. case toolResponse(BidiGenerateContentToolResponse) } + +extension BidiGenerateContentClientMessage: Encodable { + enum CodingKeys: CodingKey { + case setup + case clientContent + case realtimeInput + case toolResponse + } + + func encode(to encoder: any Encoder) throws { + var container = encoder.container(keyedBy: CodingKeys.self) + switch self { + case let .setup(setup): + try container.encode(setup, forKey: .setup) + case let .clientContent(clientContent): + try container.encode(clientContent, forKey: .clientContent) + case let .realtimeInput(realtimeInput): + try container.encode(realtimeInput, forKey: .realtimeInput) + case let .toolResponse(toolResponse): + try container.encode(toolResponse, forKey: .toolResponse) + } + } +} From 537f8f888c39ebd9949758a9e2edbc96683d1f16 Mon Sep 17 00:00:00 2001 From: Andrew Heard Date: Sun, 10 Aug 2025 12:49:03 -0400 Subject: [PATCH 04/13] Fix `BidiGenerateContentServerMessage` decoding --- .../BidiGenerateContentServerMessage.swift | 56 ++++++++++++++++++- 1 file changed, 54 insertions(+), 2 deletions(-) diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift index 627fa12d771..cb3b5e0e4e3 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift @@ -15,9 +15,9 @@ import Foundation /// Response message for BidiGenerateContent RPC call. -struct BidiGenerateContentServerMessage: Decodable { +struct BidiGenerateContentServerMessage { /// The type of the message. - enum MessageType: Decodable { + enum MessageType { /// Sent in response to a `BidiGenerateContentSetup` message from the client. case setupComplete(BidiGenerateContentSetupComplete) @@ -43,3 +43,55 @@ struct BidiGenerateContentServerMessage: Decodable { /// Usage metadata about the response(s). let usageMetadata: GenerateContentResponse.UsageMetadata? } + +// MARK: - Decodable + +extension BidiGenerateContentServerMessage: Decodable { + enum CodingKeys: String, CodingKey { + case setupComplete + case serverContent + case toolCall + case toolCallCancellation + case goAway + case usageMetadata + } + + init(from decoder: any Decoder) throws { + let container = try decoder.container(keyedBy: CodingKeys.self) + + if let setupComplete = try container.decodeIfPresent( + BidiGenerateContentSetupComplete.self, + forKey: .setupComplete + ) { + messageType = .setupComplete(setupComplete) + } else if let serverContent = try container.decodeIfPresent( + BidiGenerateContentServerContent.self, + forKey: .serverContent + ) { + messageType = .serverContent(serverContent) + } else if let toolCall = try container.decodeIfPresent( + BidiGenerateContentToolCall.self, + forKey: .toolCall + ) { + messageType = .toolCall(toolCall) + } else if let toolCallCancellation = try container.decodeIfPresent( + BidiGenerateContentToolCallCancellation.self, + forKey: .toolCallCancellation + ) { + messageType = .toolCallCancellation(toolCallCancellation) + } else if let goAway = try container.decodeIfPresent(GoAway.self, forKey: .goAway) { + messageType = .goAway(goAway) + } else { + let context = DecodingError.Context( + codingPath: decoder.codingPath, + debugDescription: "Could not decode server message." + ) + throw DecodingError.dataCorrupted(context) + } + + usageMetadata = try container.decodeIfPresent( + GenerateContentResponse.UsageMetadata.self, + forKey: .usageMetadata + ) + } +} From 781169626e1b1b6edde7410ac0659b633dbf25aa Mon Sep 17 00:00:00 2001 From: Andrew Heard Date: Sun, 10 Aug 2025 13:17:21 -0400 Subject: [PATCH 05/13] Add `LiveGenerationConfig` and add to setup --- .../Live/BidiGenerateContentSetup.swift | 24 +-- .../Public/Live/LiveGenerationConfig.swift | 155 ++++++++++++++++++ 2 files changed, 168 insertions(+), 11 deletions(-) create mode 100644 FirebaseAI/Sources/Types/Public/Live/LiveGenerationConfig.swift diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetup.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetup.swift index ec199a59b41..2744950d68f 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetup.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetup.swift @@ -28,17 +28,7 @@ struct BidiGenerateContentSetup: Encodable { let model: String /// Generation config. - /// - /// The following fields aren't supported: - /// - /// - `response_logprobs` - /// - `response_mime_type` - /// - `logprobs` - /// - `response_schema` - /// - `stop_sequence` - /// - `routing_config` - /// - `audio_timestamp` - let generationConfig: GenerationConfig? + let generationConfig: LiveGenerationConfig? /// The user provided system instructions for the model. /// Note: only text should be used in parts and content in each part will be @@ -54,4 +44,16 @@ struct BidiGenerateContentSetup: Encodable { /// Configures the handling of realtime input. let realtimeInputConfig: RealtimeInputConfig? + + init(model: String, + generationConfig: LiveGenerationConfig? = nil, + systemInstruction: ModelContent? = nil, + tools: [Tool]? = nil, + realtimeInputConfig: RealtimeInputConfig? = nil) { + self.model = model + self.generationConfig = generationConfig + self.systemInstruction = systemInstruction + self.tools = tools + self.realtimeInputConfig = realtimeInputConfig + } } diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveGenerationConfig.swift b/FirebaseAI/Sources/Types/Public/Live/LiveGenerationConfig.swift new file mode 100644 index 00000000000..ae961d14fb0 --- /dev/null +++ b/FirebaseAI/Sources/Types/Public/Live/LiveGenerationConfig.swift @@ -0,0 +1,155 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +/// A struct defining model parameters to be used when sending generative AI +/// requests to the backend model. +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +public struct LiveGenerationConfig: Sendable { + /// Controls the degree of randomness in token selection. + let temperature: Float? + + /// Controls diversity of generated text. + let topP: Float? + + /// Limits the number of highest probability words considered. + let topK: Int? + + /// The number of response variations to return. + let candidateCount: Int? + + /// Maximum number of tokens that can be generated in the response. + let maxOutputTokens: Int? + + /// Controls the likelihood of repeating the same words or phrases already generated in the text. + let presencePenalty: Float? + + /// Controls the likelihood of repeating words, with the penalty increasing for each repetition. + let frequencyPenalty: Float? + + /// Supported modalities of the response. + let responseModalities: [ResponseModality]? + + /// Creates a new `GenerationConfig` value. + /// + /// See the + /// [Configure model parameters](https://firebase.google.com/docs/vertex-ai/model-parameters) + /// guide and the + /// [Cloud documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference#generationconfig) + /// for more details. + /// + /// - Parameters: + /// - temperature:Controls the randomness of the language model's output. Higher values (for + /// example, 1.0) make the text more random and creative, while lower values (for example, + /// 0.1) make it more focused and deterministic. + /// + /// > Note: A temperature of 0 means that the highest probability tokens are always selected. + /// > In this case, responses for a given prompt are mostly deterministic, but a small amount + /// > of variation is still possible. + /// + /// > Important: The range of supported temperature values depends on the model; see the + /// > [documentation](https://firebase.google.com/docs/vertex-ai/model-parameters?platform=ios#temperature) + /// > for more details. + /// - topP: Controls diversity of generated text. Higher values (e.g., 0.9) produce more diverse + /// text, while lower values (e.g., 0.5) make the output more focused. + /// + /// The supported range is 0.0 to 1.0. + /// + /// > Important: The default `topP` value depends on the model; see the + /// > [documentation](https://firebase.google.com/docs/vertex-ai/model-parameters?platform=ios#top-p) + /// > for more details. + /// - topK: Limits the number of highest probability words the model considers when generating + /// text. For example, a topK of 40 means only the 40 most likely words are considered for the + /// next token. A higher value increases diversity, while a lower value makes the output more + /// deterministic. + /// + /// The supported range is 1 to 40. + /// + /// > Important: Support for `topK` and the default value depends on the model; see the + /// [documentation](https://firebase.google.com/docs/vertex-ai/model-parameters?platform=ios#top-k) + /// for more details. + /// - candidateCount: The number of response variations to return; defaults to 1 if not set. + /// Support for multiple candidates depends on the model; see the + /// [Cloud documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference#generationconfig) + /// for more details. + /// - maxOutputTokens: Maximum number of tokens that can be generated in the response. + /// See the configure model parameters [documentation](https://firebase.google.com/docs/vertex-ai/model-parameters?platform=ios#max-output-tokens) + /// for more details. + /// - presencePenalty: Controls the likelihood of repeating the same words or phrases already + /// generated in the text. Higher values increase the penalty of repetition, resulting in more + /// diverse output. + /// + /// > Note: While both `presencePenalty` and `frequencyPenalty` discourage repetition, + /// > `presencePenalty` applies the same penalty regardless of how many times the word/phrase + /// > has already appeared, whereas `frequencyPenalty` increases the penalty for *each* + /// > repetition of a word/phrase. + /// + /// > Important: The range of supported `presencePenalty` values depends on the model; see the + /// > [Cloud documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference#generationconfig) + /// > for more details + /// - frequencyPenalty: Controls the likelihood of repeating words or phrases, with the penalty + /// increasing for each repetition. Higher values increase the penalty of repetition, + /// resulting in more diverse output. + /// + /// > Note: While both `frequencyPenalty` and `presencePenalty` discourage repetition, + /// > `frequencyPenalty` increases the penalty for *each* repetition of a word/phrase, whereas + /// > `presencePenalty` applies the same penalty regardless of how many times the word/phrase + /// > has already appeared. + /// + /// > Important: The range of supported `frequencyPenalty` values depends on the model; see + /// > the + /// > [Cloud documentation](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference#generationconfig) + /// > for more details + /// - responseModalities: The data types (modalities) that may be returned in model responses. + /// + /// See the [multimodal + /// responses](https://cloud.google.com/vertex-ai/generative-ai/docs/multimodal-response-generation) + /// documentation for more details. + /// + /// > Warning: Specifying response modalities is a **Public Preview** feature, which means + /// > that it is not subject to any SLA or deprecation policy and could change in + /// > backwards-incompatible ways. + public init(temperature: Float? = nil, topP: Float? = nil, topK: Int? = nil, + candidateCount: Int? = nil, maxOutputTokens: Int? = nil, + presencePenalty: Float? = nil, frequencyPenalty: Float? = nil, + responseModalities: [ResponseModality]? = nil) { + // Explicit init because otherwise if we re-arrange the above variables it changes the API + // surface. + self.temperature = temperature + self.topP = topP + self.topK = topK + self.candidateCount = candidateCount + self.maxOutputTokens = maxOutputTokens + self.presencePenalty = presencePenalty + self.frequencyPenalty = frequencyPenalty + self.responseModalities = responseModalities + } +} + +// MARK: - Codable Conformances + +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) +extension LiveGenerationConfig: Encodable { + enum CodingKeys: String, CodingKey { + case temperature + case topP + case topK + case candidateCount + case maxOutputTokens + case presencePenalty + case frequencyPenalty + case responseModalities + } +} From 4c930b444a15fb06f51d06136c4dba596b34184b Mon Sep 17 00:00:00 2001 From: Andrew Heard Date: Sun, 10 Aug 2025 13:18:46 -0400 Subject: [PATCH 06/13] Add temporary state machine in `LiveSession` --- .../Public/Live/LiveGenerativeModel.swift | 33 ++- .../Types/Public/Live/LiveSession.swift | 194 +++++++++++++++++- 2 files changed, 223 insertions(+), 4 deletions(-) diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveGenerativeModel.swift b/FirebaseAI/Sources/Types/Public/Live/LiveGenerativeModel.swift index b93e6bfd4ae..08648fe4e5f 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveGenerativeModel.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveGenerativeModel.swift @@ -17,25 +17,52 @@ import Foundation @available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) public final class LiveGenerativeModel { let modelResourceName: String + let firebaseInfo: FirebaseInfo let apiConfig: APIConfig + let generationConfig: LiveGenerationConfig? let requestOptions: RequestOptions + let urlSession: URLSession init(modelResourceName: String, firebaseInfo: FirebaseInfo, apiConfig: APIConfig, + generationConfig: LiveGenerationConfig? = nil, requestOptions: RequestOptions, urlSession: URLSession = GenAIURLSession.default) { self.modelResourceName = modelResourceName + self.firebaseInfo = firebaseInfo self.apiConfig = apiConfig - // TODO: Add LiveGenerationConfig + self.generationConfig = generationConfig // TODO: Add tools // TODO: Add tool config // TODO: Add system instruction self.requestOptions = requestOptions + self.urlSession = urlSession } public func connect() async throws -> LiveSession { - // TODO: Implement connection - return LiveSession() + let liveSession = LiveSession( + modelResourceName: modelResourceName, + generationConfig: generationConfig, + url: webSocketURL(), + urlSession: urlSession + ) + print("Opening Live Session...") + try await liveSession.open() + return liveSession + } + + func webSocketURL() -> URL { + let urlString = switch apiConfig.service { + case .vertexAI: + "wss://firebasevertexai.googleapis.com/ws/google.firebase.vertexai.v1beta.LlmBidiService/BidiGenerateContent/locations/us-central1?key=\(firebaseInfo.apiKey)" + case .googleAI: + "wss://firebasevertexai.googleapis.com/ws/google.firebase.vertexai.v1beta.GenerativeService/BidiGenerateContent?key=\(firebaseInfo.apiKey)" + } + guard let url = URL(string: urlString) else { + // TODO: Add error handling + fatalError() + } + return url } } diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift b/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift index f2c88d35492..fdefd9b5b4e 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift @@ -12,4 +12,196 @@ // See the License for the specific language governing permissions and // limitations under the License. -public final class LiveSession {} +import Foundation + +// TODO: Extract most of this file into a service class similar to `GenerativeAIService`. +public final class LiveSession: NSObject, URLSessionWebSocketDelegate, URLSessionTaskDelegate { + private enum State { + case notConnected + case connecting + case setupSent + case ready + case closed + } + + private enum WebSocketError: Error { + case connectionClosed + } + + let modelResourceName: String + let generationConfig: LiveGenerationConfig? + let webSocket: URLSessionWebSocketTask + + private var state: State = .notConnected + private var pendingMessages: [(String, CheckedContinuation)] = [] + private let jsonEncoder = JSONEncoder() + private let jsonDecoder = JSONDecoder() + + init(modelResourceName: String, + generationConfig: LiveGenerationConfig?, + url: URL, + urlSession: URLSession) { + self.modelResourceName = modelResourceName + self.generationConfig = generationConfig + webSocket = urlSession.webSocketTask(with: url) + } + + func open() async throws { + guard state == .notConnected else { + print("Web socket is not in a valid state to be opened: \(state)") + return + } + + state = .connecting + webSocket.delegate = self + webSocket.resume() + + print("Opening websocket") + } + + private func failPendingMessages(with error: Error) { + for (_, continuation) in pendingMessages { + continuation.resume(throwing: error) + } + pendingMessages.removeAll() + } + + private func processPendingMessages() { + for (message, continuation) in pendingMessages { + Task { + do { + try await send(message) + continuation.resume() + } catch { + continuation.resume(throwing: error) + } + } + } + pendingMessages.removeAll() + } + + private func send(_ message: String) async throws { + let content = ModelContent(role: "user", parts: [message]) + let clientContent = BidiGenerateContentClientContent(turns: [content], turnComplete: true) + let clientMessage = BidiGenerateContentClientMessage.clientContent(clientContent) + let clientMessageData = try jsonEncoder.encode(clientMessage) + let clientMessageJSON = String(data: clientMessageData, encoding: .utf8) + print("Client Message JSON: \(clientMessageJSON)") + try await webSocket.send(.data(clientMessageData)) + setReceiveHandler() + } + + public func sendMessage(_ message: String) async throws { + if state == .ready { + try await send(message) + } else { + try await withCheckedThrowingContinuation { continuation in + pendingMessages.append((message, continuation)) + } + } + } + + public func urlSession(_ session: URLSession, + webSocketTask: URLSessionWebSocketTask, + didOpenWithProtocol protocol: String?) { + print("Web Socket opened.") + + guard state == .connecting else { + print("Web socket is not in a valid state to be opened: \(state)") + return + } + + do { + let setup = BidiGenerateContentSetup( + model: modelResourceName, generationConfig: generationConfig + ) + let message = BidiGenerateContentClientMessage.setup(setup) + let messageData = try jsonEncoder.encode(message) + let messageJSON = String(data: messageData, encoding: .utf8) + print("JSON: \(messageJSON)") + webSocketTask.send(.data(messageData)) { error in + if let error { + print("Send Error: \(error)") + self.state = .closed + self.failPendingMessages(with: error) + return + } + + self.state = .setupSent + self.setReceiveHandler() + } + } catch { + print(error) + state = .closed + failPendingMessages(with: error) + } + } + + public func urlSession(_ session: URLSession, + webSocketTask: URLSessionWebSocketTask, + didCloseWith closeCode: URLSessionWebSocketTask.CloseCode, + reason: Data?) { + print("Web Socket closed.") + state = .closed + failPendingMessages(with: WebSocketError.connectionClosed) + } + + func setReceiveHandler() { + guard state == .setupSent || state == .ready else { + print("Web socket is not in a valid state to receive messages: \(state)") + return + } + + webSocket.receive { result in + do { + let message = try result.get() + switch message { + case let .string(string): + print("Unexpected string response: \(string)") + self.setReceiveHandler() + case let .data(data): + let response = try self.jsonDecoder.decode( + BidiGenerateContentServerMessage.self, + from: data + ) + let responseJSON = String(data: data, encoding: .utf8) + + switch response.messageType { + case .setupComplete: + print("Setup Complete: \(responseJSON)") + self.state = .ready + self.processPendingMessages() + case .serverContent: + // TODO: Return the serverContent to the developer + print("Server Content: \(responseJSON)") + case .toolCall: + // TODO: Tool calls not yet implemented + print("Tool Call: \(responseJSON)") + case .toolCallCancellation: + // TODO: Tool call cancellation not yet implemented + print("Tool Call Cancellation: \(responseJSON)") + case let .goAway(goAway): + if let timeLeft = goAway.timeLeft { + print("Server will disconnect in \(timeLeft) seconds.") + } else { + print("Server will disconnect soon.") + } + } + + if self.state == .closed { + print("Web socket is closed, not listening for more messages.") + } else { + self.setReceiveHandler() + } + @unknown default: + print("Unknown message received") + self.setReceiveHandler() + } + } catch { + // handle the error + print(error) + self.state = .closed + } + } + } +} From 05fc2ff2a48933dec460fc41ea6b072b4fceb6b3 Mon Sep 17 00:00:00 2001 From: Andrew Heard Date: Sun, 10 Aug 2025 13:19:25 -0400 Subject: [PATCH 07/13] Add `liveModel` static method to construct `LiveGenerativeModel` --- FirebaseAI/Sources/FirebaseAI.swift | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/FirebaseAI/Sources/FirebaseAI.swift b/FirebaseAI/Sources/FirebaseAI.swift index 48f7183d4e6..6730bada5b6 100644 --- a/FirebaseAI/Sources/FirebaseAI.swift +++ b/FirebaseAI/Sources/FirebaseAI.swift @@ -130,6 +130,18 @@ public final class FirebaseAI: Sendable { ) } + public func liveModel(modelName: String, + generationConfig: LiveGenerationConfig? = nil, + requestOptions: RequestOptions = RequestOptions()) -> LiveGenerativeModel { + return LiveGenerativeModel( + modelResourceName: modelResourceName(modelName: modelName), + firebaseInfo: firebaseInfo, + apiConfig: apiConfig, + generationConfig: generationConfig, + requestOptions: requestOptions + ) + } + /// Class to enable FirebaseAI to register via the Objective-C based Firebase component system /// to include FirebaseAI in the userAgent. @objc(FIRVertexAIComponent) class FirebaseVertexAIComponent: NSObject {} From e78343163b9a1f846fdfb09c0dd7395026031d59 Mon Sep 17 00:00:00 2001 From: Andrew Heard Date: Sun, 10 Aug 2025 13:57:33 -0400 Subject: [PATCH 08/13] Emit `responses` from `LiveSession` --- .../Live/BidiGenerateContentServerMessage.swift | 6 ++++-- .../Sources/Types/Public/Live/LiveSession.swift | 14 +++++++++++++- 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift index cb3b5e0e4e3..9270bb5c7c3 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift @@ -15,7 +15,9 @@ import Foundation /// Response message for BidiGenerateContent RPC call. -struct BidiGenerateContentServerMessage { +public struct BidiGenerateContentServerMessage { + // TODO: Make this type `internal` + /// The type of the message. enum MessageType { /// Sent in response to a `BidiGenerateContentSetup` message from the client. @@ -56,7 +58,7 @@ extension BidiGenerateContentServerMessage: Decodable { case usageMetadata } - init(from decoder: any Decoder) throws { + public init(from decoder: any Decoder) throws { let container = try decoder.container(keyedBy: CodingKeys.self) if let setupComplete = try container.decodeIfPresent( diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift b/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift index fdefd9b5b4e..dfab4734ae6 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift @@ -32,11 +32,18 @@ public final class LiveSession: NSObject, URLSessionWebSocketDelegate, URLSessio let generationConfig: LiveGenerationConfig? let webSocket: URLSessionWebSocketTask + // TODO: Refactor this property, potentially returning responses after `connect`. + public let responses: AsyncThrowingStream + private var state: State = .notConnected private var pendingMessages: [(String, CheckedContinuation)] = [] private let jsonEncoder = JSONEncoder() private let jsonDecoder = JSONDecoder() + // TODO: Properly wrap callback code using `withCheckedContinuation` or similar. + private let responseContinuation: AsyncThrowingStream + .Continuation + init(modelResourceName: String, generationConfig: LiveGenerationConfig?, url: URL, @@ -44,6 +51,7 @@ public final class LiveSession: NSObject, URLSessionWebSocketDelegate, URLSessio self.modelResourceName = modelResourceName self.generationConfig = generationConfig webSocket = urlSession.webSocketTask(with: url) + (responses, responseContinuation) = AsyncThrowingStream.makeStream() } func open() async throws { @@ -64,6 +72,7 @@ public final class LiveSession: NSObject, URLSessionWebSocketDelegate, URLSessio continuation.resume(throwing: error) } pendingMessages.removeAll() + responseContinuation.finish(throwing: error) } private func processPendingMessages() { @@ -144,6 +153,7 @@ public final class LiveSession: NSObject, URLSessionWebSocketDelegate, URLSessio print("Web Socket closed.") state = .closed failPendingMessages(with: WebSocketError.connectionClosed) + responseContinuation.finish() } func setReceiveHandler() { @@ -172,7 +182,6 @@ public final class LiveSession: NSObject, URLSessionWebSocketDelegate, URLSessio self.state = .ready self.processPendingMessages() case .serverContent: - // TODO: Return the serverContent to the developer print("Server Content: \(responseJSON)") case .toolCall: // TODO: Tool calls not yet implemented @@ -188,6 +197,8 @@ public final class LiveSession: NSObject, URLSessionWebSocketDelegate, URLSessio } } + self.responseContinuation.yield(response) + if self.state == .closed { print("Web socket is closed, not listening for more messages.") } else { @@ -201,6 +212,7 @@ public final class LiveSession: NSObject, URLSessionWebSocketDelegate, URLSessio // handle the error print(error) self.state = .closed + self.responseContinuation.finish(throwing: error) } } } From 1575cc2741a11a4a4f24410f5202fdb829731690 Mon Sep 17 00:00:00 2001 From: Andrew Heard Date: Sun, 10 Aug 2025 13:59:53 -0400 Subject: [PATCH 09/13] Temporarily display text Bidi responses in TestApp --- .../Tests/TestApp/Sources/ContentView.swift | 31 ++++++++++++++++--- 1 file changed, 27 insertions(+), 4 deletions(-) diff --git a/FirebaseAI/Tests/TestApp/Sources/ContentView.swift b/FirebaseAI/Tests/TestApp/Sources/ContentView.swift index 52af5939455..56631916627 100644 --- a/FirebaseAI/Tests/TestApp/Sources/ContentView.swift +++ b/FirebaseAI/Tests/TestApp/Sources/ContentView.swift @@ -12,17 +12,40 @@ // See the License for the specific language governing permissions and // limitations under the License. +import FirebaseAI import SwiftUI struct ContentView: View { + // TODO: Revert changes in this file. For prototyping purposes only. + let liveModel: LiveGenerativeModel = { + // let firebaseAI = FirebaseAI.firebaseAI(backend: .vertexAI()) + let firebaseAI = FirebaseAI.firebaseAI() + return firebaseAI.liveModel( + modelName: "gemini-2.0-flash-live-001", + generationConfig: LiveGenerationConfig(responseModalities: [.text]) + ) + }() + + @State private var responses: [String] = [] + var body: some View { VStack { - Image(systemName: "globe") - .imageScale(.large) - .foregroundStyle(.tint) - Text("Hello, world!") + List(responses, id: \.self) { + Text($0) + } } .padding() + .task { + do { + let liveSession = try await liveModel.connect() + try await liveSession.sendMessage("Why is the sky blue?") + for try await response in liveSession.responses { + responses.append(String(describing: response)) + } + } catch { + print(error) + } + } } } From 8d9e758a1caf0c2bedf2a34f58b500212fee5143 Mon Sep 17 00:00:00 2001 From: Andrew Heard Date: Sun, 10 Aug 2025 15:51:08 -0400 Subject: [PATCH 10/13] Refactor to use async/await and remove `URLSessionWebSocketDelegate` --- .../BidiGenerateContentServerMessage.swift | 2 +- .../Public/Live/LiveGenerativeModel.swift | 4 +- .../Types/Public/Live/LiveSession.swift | 184 +++--------------- .../Tests/TestApp/Sources/ContentView.swift | 2 +- 4 files changed, 35 insertions(+), 157 deletions(-) diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift index 9270bb5c7c3..761739afabd 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift @@ -15,7 +15,7 @@ import Foundation /// Response message for BidiGenerateContent RPC call. -public struct BidiGenerateContentServerMessage { +public struct BidiGenerateContentServerMessage: Sendable { // TODO: Make this type `internal` /// The type of the message. diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveGenerativeModel.swift b/FirebaseAI/Sources/Types/Public/Live/LiveGenerativeModel.swift index 08648fe4e5f..689e690a631 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveGenerativeModel.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveGenerativeModel.swift @@ -40,7 +40,7 @@ public final class LiveGenerativeModel { self.urlSession = urlSession } - public func connect() async throws -> LiveSession { + public func connect() -> LiveSession { let liveSession = LiveSession( modelResourceName: modelResourceName, generationConfig: generationConfig, @@ -48,7 +48,7 @@ public final class LiveGenerativeModel { urlSession: urlSession ) print("Opening Live Session...") - try await liveSession.open() + liveSession.openConnection() return liveSession } diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift b/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift index dfab4734ae6..641c93b4b06 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift @@ -15,35 +15,18 @@ import Foundation // TODO: Extract most of this file into a service class similar to `GenerativeAIService`. -public final class LiveSession: NSObject, URLSessionWebSocketDelegate, URLSessionTaskDelegate { - private enum State { - case notConnected - case connecting - case setupSent - case ready - case closed - } - - private enum WebSocketError: Error { - case connectionClosed - } - +public final class LiveSession: Sendable { let modelResourceName: String let generationConfig: LiveGenerationConfig? let webSocket: URLSessionWebSocketTask - // TODO: Refactor this property, potentially returning responses after `connect`. public let responses: AsyncThrowingStream + private let responseContinuation: AsyncThrowingStream + .Continuation - private var state: State = .notConnected - private var pendingMessages: [(String, CheckedContinuation)] = [] private let jsonEncoder = JSONEncoder() private let jsonDecoder = JSONDecoder() - // TODO: Properly wrap callback code using `withCheckedContinuation` or similar. - private let responseContinuation: AsyncThrowingStream - .Continuation - init(modelResourceName: String, generationConfig: LiveGenerationConfig?, url: URL, @@ -54,166 +37,61 @@ public final class LiveSession: NSObject, URLSessionWebSocketDelegate, URLSessio (responses, responseContinuation) = AsyncThrowingStream.makeStream() } - func open() async throws { - guard state == .notConnected else { - print("Web socket is not in a valid state to be opened: \(state)") - return - } - - state = .connecting - webSocket.delegate = self - webSocket.resume() - - print("Opening websocket") - } - - private func failPendingMessages(with error: Error) { - for (_, continuation) in pendingMessages { - continuation.resume(throwing: error) - } - pendingMessages.removeAll() - responseContinuation.finish(throwing: error) + deinit { + webSocket.cancel(with: .goingAway, reason: nil) } - private func processPendingMessages() { - for (message, continuation) in pendingMessages { - Task { - do { - try await send(message) - continuation.resume() - } catch { - continuation.resume(throwing: error) - } - } - } - pendingMessages.removeAll() - } - - private func send(_ message: String) async throws { + public func sendMessage(_ message: String) async throws { let content = ModelContent(role: "user", parts: [message]) let clientContent = BidiGenerateContentClientContent(turns: [content], turnComplete: true) let clientMessage = BidiGenerateContentClientMessage.clientContent(clientContent) let clientMessageData = try jsonEncoder.encode(clientMessage) - let clientMessageJSON = String(data: clientMessageData, encoding: .utf8) - print("Client Message JSON: \(clientMessageJSON)") try await webSocket.send(.data(clientMessageData)) - setReceiveHandler() } - public func sendMessage(_ message: String) async throws { - if state == .ready { - try await send(message) - } else { - try await withCheckedThrowingContinuation { continuation in - pendingMessages.append((message, continuation)) - } + func openConnection() { + webSocket.resume() + // TODO: Verify that this task gets cancelled on deinit + Task { + await startEventLoop() } } - public func urlSession(_ session: URLSession, - webSocketTask: URLSessionWebSocketTask, - didOpenWithProtocol protocol: String?) { - print("Web Socket opened.") - - guard state == .connecting else { - print("Web socket is not in a valid state to be opened: \(state)") - return + private func startEventLoop() async { + defer { + webSocket.cancel(with: .goingAway, reason: nil) } do { - let setup = BidiGenerateContentSetup( - model: modelResourceName, generationConfig: generationConfig - ) - let message = BidiGenerateContentClientMessage.setup(setup) - let messageData = try jsonEncoder.encode(message) - let messageJSON = String(data: messageData, encoding: .utf8) - print("JSON: \(messageJSON)") - webSocketTask.send(.data(messageData)) { error in - if let error { - print("Send Error: \(error)") - self.state = .closed - self.failPendingMessages(with: error) - return - } - - self.state = .setupSent - self.setReceiveHandler() - } - } catch { - print(error) - state = .closed - failPendingMessages(with: error) - } - } - - public func urlSession(_ session: URLSession, - webSocketTask: URLSessionWebSocketTask, - didCloseWith closeCode: URLSessionWebSocketTask.CloseCode, - reason: Data?) { - print("Web Socket closed.") - state = .closed - failPendingMessages(with: WebSocketError.connectionClosed) - responseContinuation.finish() - } - - func setReceiveHandler() { - guard state == .setupSent || state == .ready else { - print("Web socket is not in a valid state to receive messages: \(state)") - return - } + try await sendSetupMessage() - webSocket.receive { result in - do { - let message = try result.get() + while !Task.isCancelled { + let message = try await webSocket.receive() switch message { case let .string(string): print("Unexpected string response: \(string)") - self.setReceiveHandler() case let .data(data): - let response = try self.jsonDecoder.decode( + let response = try jsonDecoder.decode( BidiGenerateContentServerMessage.self, from: data ) - let responseJSON = String(data: data, encoding: .utf8) - - switch response.messageType { - case .setupComplete: - print("Setup Complete: \(responseJSON)") - self.state = .ready - self.processPendingMessages() - case .serverContent: - print("Server Content: \(responseJSON)") - case .toolCall: - // TODO: Tool calls not yet implemented - print("Tool Call: \(responseJSON)") - case .toolCallCancellation: - // TODO: Tool call cancellation not yet implemented - print("Tool Call Cancellation: \(responseJSON)") - case let .goAway(goAway): - if let timeLeft = goAway.timeLeft { - print("Server will disconnect in \(timeLeft) seconds.") - } else { - print("Server will disconnect soon.") - } - } - - self.responseContinuation.yield(response) - - if self.state == .closed { - print("Web socket is closed, not listening for more messages.") - } else { - self.setReceiveHandler() - } + responseContinuation.yield(response) @unknown default: print("Unknown message received") - self.setReceiveHandler() } - } catch { - // handle the error - print(error) - self.state = .closed - self.responseContinuation.finish(throwing: error) } + } catch { + responseContinuation.finish(throwing: error) } + responseContinuation.finish() + } + + private func sendSetupMessage() async throws { + let setup = BidiGenerateContentSetup( + model: modelResourceName, generationConfig: generationConfig + ) + let message = BidiGenerateContentClientMessage.setup(setup) + let messageData = try jsonEncoder.encode(message) + try await webSocket.send(.data(messageData)) } } diff --git a/FirebaseAI/Tests/TestApp/Sources/ContentView.swift b/FirebaseAI/Tests/TestApp/Sources/ContentView.swift index 56631916627..37ef5fd527a 100644 --- a/FirebaseAI/Tests/TestApp/Sources/ContentView.swift +++ b/FirebaseAI/Tests/TestApp/Sources/ContentView.swift @@ -37,7 +37,7 @@ struct ContentView: View { .padding() .task { do { - let liveSession = try await liveModel.connect() + let liveSession = liveModel.connect() try await liveSession.sendMessage("Why is the sky blue?") for try await response in liveSession.responses { responses.append(String(describing: response)) From 87540eee6e17d97d3643b024b1428ebaebb77455 Mon Sep 17 00:00:00 2001 From: Andrew Heard Date: Sun, 10 Aug 2025 15:56:03 -0400 Subject: [PATCH 11/13] Add platform availability annotations --- .../Types/Internal/Live/BidiGenerateContentClientContent.swift | 1 + .../Types/Internal/Live/BidiGenerateContentClientMessage.swift | 2 ++ .../Types/Internal/Live/BidiGenerateContentRealtimeInput.swift | 1 + .../Types/Internal/Live/BidiGenerateContentServerContent.swift | 1 + .../Types/Internal/Live/BidiGenerateContentServerMessage.swift | 1 + .../Sources/Types/Internal/Live/BidiGenerateContentSetup.swift | 1 + .../Types/Internal/Live/BidiGenerateContentSetupComplete.swift | 1 + .../Types/Internal/Live/BidiGenerateContentToolCall.swift | 1 + .../Internal/Live/BidiGenerateContentToolCallCancellation.swift | 1 + .../Types/Internal/Live/BidiGenerateContentToolResponse.swift | 1 + FirebaseAI/Sources/Types/Internal/Live/GoAway.swift | 1 + .../Sources/Types/Internal/Live/RealtimeInputConfig.swift | 1 + FirebaseAI/Sources/Types/Public/Live/LiveSession.swift | 1 + 13 files changed, 14 insertions(+) diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientContent.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientContent.swift index 91fed495ac5..a24944d83fd 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientContent.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientContent.swift @@ -19,6 +19,7 @@ import Foundation /// history and used as part of the prompt to the model to generate content. /// /// A message here will interrupt any current model generation. +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) struct BidiGenerateContentClientContent: Encodable { /// The content appended to the current conversation with the model. /// diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientMessage.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientMessage.swift index 147e986e863..d4e47982af1 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientMessage.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentClientMessage.swift @@ -15,6 +15,7 @@ import Foundation /// Messages sent by the client in the BidiGenerateContent RPC call. +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) enum BidiGenerateContentClientMessage { /// Message to be sent in the first and only first client message. case setup(BidiGenerateContentSetup) @@ -29,6 +30,7 @@ enum BidiGenerateContentClientMessage { case toolResponse(BidiGenerateContentToolResponse) } +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) extension BidiGenerateContentClientMessage: Encodable { enum CodingKeys: CodingKey { case setup diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentRealtimeInput.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentRealtimeInput.swift index 26a9f84d8d7..8f57b8875f2 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentRealtimeInput.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentRealtimeInput.swift @@ -28,6 +28,7 @@ import Foundation /// to optimize for a fast start of the response from the model. /// - Is always assumed to be the user's input (cannot be used to populate /// conversation history). +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) struct BidiGenerateContentRealtimeInput: Encodable { /// These form the realtime audio input stream. let audio: Data? diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerContent.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerContent.swift index f09ec48a303..8d9d1e8940b 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerContent.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerContent.swift @@ -19,6 +19,7 @@ import Foundation /// /// Content is generated as quickly as possible, and not in realtime. Clients /// may choose to buffer and play it out in realtime. +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) struct BidiGenerateContentServerContent: Decodable { /// The content that the model has generated as part of the current /// conversation with the user. diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift index 761739afabd..7b0ce692db4 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift @@ -15,6 +15,7 @@ import Foundation /// Response message for BidiGenerateContent RPC call. +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) public struct BidiGenerateContentServerMessage: Sendable { // TODO: Make this type `internal` diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetup.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetup.swift index 2744950d68f..5541b7c107a 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetup.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetup.swift @@ -20,6 +20,7 @@ import Foundation /// /// Clients should wait for a `BidiGenerateContentSetupComplete` message before /// sending any additional messages. +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) struct BidiGenerateContentSetup: Encodable { /// The fully qualified name of the publisher model. /// diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetupComplete.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetupComplete.swift index a2b02c0caf2..cbf1dc6d960 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetupComplete.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentSetupComplete.swift @@ -15,4 +15,5 @@ import Foundation /// Sent in response to a `BidiGenerateContentSetup` message from the client. +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) struct BidiGenerateContentSetupComplete: Decodable {} diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolCall.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolCall.swift index e53decadfab..86ded221fc3 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolCall.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolCall.swift @@ -16,6 +16,7 @@ import Foundation /// Request for the client to execute the `function_calls` and return the /// responses with the matching `id`s. +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) struct BidiGenerateContentToolCall: Decodable { /// The function call to be executed. let functionCalls: [FunctionCall]? diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolCallCancellation.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolCallCancellation.swift index fb25fd9f330..096e8a1a11e 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolCallCancellation.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolCallCancellation.swift @@ -19,6 +19,7 @@ import Foundation /// cancelled. If there were side-effects to those tool calls, clients may /// attempt to undo the tool calls. This message occurs only in cases where the /// clients interrupt server turns. +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) struct BidiGenerateContentToolCallCancellation: Decodable { /// The ids of the tool calls to be cancelled. let ids: [String]? diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolResponse.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolResponse.swift index 245f2668a0e..8b4e4ba48b2 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolResponse.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentToolResponse.swift @@ -22,6 +22,7 @@ import Foundation /// calling happens by exchanging the `Content` parts, while in the bidi /// GenerateContent APIs function calling happens over these dedicated set of /// messages. +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) struct BidiGenerateContentToolResponse: Encodable { /// The response to the function calls. let functionResponses: [FunctionResponse]? diff --git a/FirebaseAI/Sources/Types/Internal/Live/GoAway.swift b/FirebaseAI/Sources/Types/Internal/Live/GoAway.swift index 729d86c6cfd..45a2a7e944d 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/GoAway.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/GoAway.swift @@ -15,6 +15,7 @@ import Foundation /// Server will not be able to service client soon. +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) struct GoAway: Decodable { /// The remaining time before the connection will be terminated as ABORTED. /// The minimal time returned here is specified differently together with diff --git a/FirebaseAI/Sources/Types/Internal/Live/RealtimeInputConfig.swift b/FirebaseAI/Sources/Types/Internal/Live/RealtimeInputConfig.swift index 8ebade9b98b..08bcfe076f8 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/RealtimeInputConfig.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/RealtimeInputConfig.swift @@ -15,6 +15,7 @@ import Foundation /// Configures the realtime input behavior in `BidiGenerateContent`. +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) struct RealtimeInputConfig: Encodable { /// Configures automatic detection of activity. struct AutomaticActivityDetection: Encodable { diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift b/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift index 641c93b4b06..63542320236 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift @@ -15,6 +15,7 @@ import Foundation // TODO: Extract most of this file into a service class similar to `GenerativeAIService`. +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) public final class LiveSession: Sendable { let modelResourceName: String let generationConfig: LiveGenerationConfig? From 0db90fabb14230621e15667ed24a789e1878afad Mon Sep 17 00:00:00 2001 From: Andrew Heard Date: Mon, 11 Aug 2025 10:04:45 -0400 Subject: [PATCH 12/13] Add `BidiGenerateContentServerMessage` availability annotation --- .../Types/Internal/Live/BidiGenerateContentServerMessage.swift | 1 + 1 file changed, 1 insertion(+) diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift index 7b0ce692db4..950819e0343 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentServerMessage.swift @@ -49,6 +49,7 @@ public struct BidiGenerateContentServerMessage: Sendable { // MARK: - Decodable +@available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) extension BidiGenerateContentServerMessage: Decodable { enum CodingKeys: String, CodingKey { case setupComplete From 7c488a15c1aee17a41b5b6f33a194a39ed956b72 Mon Sep 17 00:00:00 2001 From: Andrew Heard Date: Mon, 11 Aug 2025 23:29:46 -0400 Subject: [PATCH 13/13] Add `AsyncWebSocket` wrapper for `URLSessionWebSocketTask` --- .../Types/Internal/Live/AsyncWebSocket.swift | 107 ++++++++++++++++++ .../BidiGenerateContentRealtimeInput.swift | 2 +- .../Types/Public/Live/LiveSession.swift | 55 ++++----- 3 files changed, 130 insertions(+), 34 deletions(-) create mode 100644 FirebaseAI/Sources/Types/Internal/Live/AsyncWebSocket.swift diff --git a/FirebaseAI/Sources/Types/Internal/Live/AsyncWebSocket.swift b/FirebaseAI/Sources/Types/Internal/Live/AsyncWebSocket.swift new file mode 100644 index 00000000000..6a1da33241f --- /dev/null +++ b/FirebaseAI/Sources/Types/Internal/Live/AsyncWebSocket.swift @@ -0,0 +1,107 @@ +// Copyright 2025 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Foundation + +final class AsyncWebSocket: NSObject, @unchecked Sendable, URLSessionWebSocketDelegate { + private let webSocketTask: URLSessionWebSocketTask + private let stream: AsyncThrowingStream + private let continuation: AsyncThrowingStream.Continuation + private var continuationFinished = false + private let continuationLock = NSLock() + + private var _isConnected = false + private let isConnectedLock = NSLock() + private(set) var isConnected: Bool { + get { isConnectedLock.withLock { _isConnected } } + set { isConnectedLock.withLock { _isConnected = newValue } } + } + + init(urlSession: URLSession = GenAIURLSession.default, urlRequest: URLRequest) { + webSocketTask = urlSession.webSocketTask(with: urlRequest) + (stream, continuation) = AsyncThrowingStream + .makeStream() + } + + deinit { + webSocketTask.cancel(with: .goingAway, reason: nil) + } + + func connect() -> AsyncThrowingStream { + webSocketTask.resume() + isConnected = true + startReceiving() + return stream + } + + func disconnect() { + webSocketTask.cancel(with: .goingAway, reason: nil) + isConnected = false + continuationLock.withLock { + self.continuation.finish() + self.continuationFinished = true + } + } + + func send(_ message: URLSessionWebSocketTask.Message) async throws { + // TODO: Throw error if socket already closed + try await webSocketTask.send(message) + } + + private func startReceiving() { + Task { + while !Task.isCancelled && self.webSocketTask.isOpen && self.isConnected { + let message = try await webSocketTask.receive() + // TODO: Check continuationFinished before yielding. Use the same thread for NSLock. + continuation.yield(message) + } + } + } + + func urlSession(_ session: URLSession, + webSocketTask: URLSessionWebSocketTask, + didCloseWith closeCode: URLSessionWebSocketTask.CloseCode, + reason: Data?) { + continuationLock.withLock { + guard !continuationFinished else { return } + continuation.finish() + continuationFinished = true + } + } +} + +private extension URLSessionWebSocketTask { + var isOpen: Bool { + return closeCode == .invalid + } +} + +struct WebSocketClosedError: Error, Sendable, CustomNSError { + let closeCode: URLSessionWebSocketTask.CloseCode + let closeReason: String + + init(closeCode: URLSessionWebSocketTask.CloseCode, closeReason: Data?) { + self.closeCode = closeCode + self.closeReason = closeReason + .flatMap { String(data: $0, encoding: .utf8) } ?? "Unknown reason." + } + + var errorCode: Int { closeCode.rawValue } + + var errorUserInfo: [String: Any] { + [ + NSLocalizedDescriptionKey: "WebSocket closed with code \(closeCode.rawValue). Reason: \(closeReason)", + ] + } +} diff --git a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentRealtimeInput.swift b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentRealtimeInput.swift index 8f57b8875f2..3849b10c561 100644 --- a/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentRealtimeInput.swift +++ b/FirebaseAI/Sources/Types/Internal/Live/BidiGenerateContentRealtimeInput.swift @@ -31,7 +31,7 @@ import Foundation @available(iOS 15.0, macOS 12.0, macCatalyst 15.0, tvOS 15.0, watchOS 8.0, *) struct BidiGenerateContentRealtimeInput: Encodable { /// These form the realtime audio input stream. - let audio: Data? + let audio: InlineData? /// Indicates that the audio stream has ended, e.g. because the microphone was /// turned off. diff --git a/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift b/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift index 63542320236..edf248c440e 100644 --- a/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift +++ b/FirebaseAI/Sources/Types/Public/Live/LiveSession.swift @@ -19,7 +19,7 @@ import Foundation public final class LiveSession: Sendable { let modelResourceName: String let generationConfig: LiveGenerationConfig? - let webSocket: URLSessionWebSocketTask + let webSocket: AsyncWebSocket public let responses: AsyncThrowingStream private let responseContinuation: AsyncThrowingStream @@ -34,12 +34,12 @@ public final class LiveSession: Sendable { urlSession: URLSession) { self.modelResourceName = modelResourceName self.generationConfig = generationConfig - webSocket = urlSession.webSocketTask(with: url) + webSocket = AsyncWebSocket(urlSession: urlSession, urlRequest: URLRequest(url: url)) (responses, responseContinuation) = AsyncThrowingStream.makeStream() } deinit { - webSocket.cancel(with: .goingAway, reason: nil) + webSocket.disconnect() } public func sendMessage(_ message: String) async throws { @@ -51,40 +51,29 @@ public final class LiveSession: Sendable { } func openConnection() { - webSocket.resume() - // TODO: Verify that this task gets cancelled on deinit Task { - await startEventLoop() - } - } - - private func startEventLoop() async { - defer { - webSocket.cancel(with: .goingAway, reason: nil) - } - - do { - try await sendSetupMessage() - - while !Task.isCancelled { - let message = try await webSocket.receive() - switch message { - case let .string(string): - print("Unexpected string response: \(string)") - case let .data(data): - let response = try jsonDecoder.decode( - BidiGenerateContentServerMessage.self, - from: data - ) - responseContinuation.yield(response) - @unknown default: - print("Unknown message received") + do { + let stream = webSocket.connect() + try await sendSetupMessage() + for try await message in stream { + switch message { + case let .string(string): + print("Unexpected string response: \(string)") + case let .data(data): + let response = try jsonDecoder.decode( + BidiGenerateContentServerMessage.self, + from: data + ) + responseContinuation.yield(response) + @unknown default: + print("Unknown message received") + } } + } catch { + responseContinuation.finish(throwing: error) } - } catch { - responseContinuation.finish(throwing: error) + responseContinuation.finish() } - responseContinuation.finish() } private func sendSetupMessage() async throws {